package org.biojavax.bio.seq.io;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import org.biojava.bio.program.tagvalue.TagValueParser;
import org.biojava.bio.seq.Feature;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.io.ParseException;
import org.biojava.bio.seq.io.SeqIOListener;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.Location;
import org.biojava.bio.symbol.SimpleSymbolList;
import org.biojava.bio.symbol.Symbol;
import org.biojava.utils.ChangeVetoException;
import org.biojava.utils.xml.PrettyXMLWriter;
import org.biojava.utils.xml.XMLWriter;
import org.biojavax.Comment;
import org.biojavax.CrossRef;
import org.biojavax.DocRef;
import org.biojavax.DocRefAuthor;
import org.biojavax.Namespace;
import org.biojavax.Note;
import org.biojavax.RankedCrossRef;
import org.biojavax.RankedDocRef;
import org.biojavax.RichObjectFactory;
import org.biojavax.SimpleCrossRef;
import org.biojavax.SimpleDocRef;
import org.biojavax.SimpleDocRefAuthor;
import org.biojavax.SimpleNote;
import org.biojavax.SimpleRankedCrossRef;
import org.biojavax.SimpleRankedDocRef;
import org.biojavax.SimpleRichAnnotation;
import org.biojavax.bio.seq.Position;
import org.biojavax.bio.seq.RichFeature;
import org.biojavax.bio.seq.RichLocation;
import org.biojavax.bio.seq.RichSequence;
import org.biojavax.bio.seq.SimplePosition;
import org.biojavax.bio.seq.SimpleRichLocation;
import org.biojavax.bio.seq.io.RichSequenceFormat;
import org.biojavax.bio.taxa.NCBITaxon;
import org.biojavax.bio.taxa.SimpleNCBITaxon;
import org.biojavax.ontology.ComparableTerm;
import org.biojavax.utils.StringTools;
import org.biojavax.utils.XMLTools;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/biojavax/bio/seq/io/INSDseqFormat.class */
public class INSDseqFormat extends RichSequenceFormat.BasicFormat {
    public static final String INSDSEQ_FORMAT = "INSDseq";
    protected static final String INSDSEQS_GROUP_TAG = "INSDSet";
    protected static final String INSDSEQ_TAG = "INSDSeq";
    protected static final String LOCUS_TAG = "INSDSeq_locus";
    protected static final String LENGTH_TAG = "INSDSeq_length";
    protected static final String TOPOLOGY_TAG = "INSDSeq_topology";
    protected static final String STRANDED_TAG = "INSDSeq_strandedness";
    protected static final String MOLTYPE_TAG = "INSDSeq_moltype";
    protected static final String DIVISION_TAG = "INSDSeq_division";
    protected static final String UPDATE_DATE_TAG = "INSDSeq_update-date";
    protected static final String CREATE_DATE_TAG = "INSDSeq_create-date";
    protected static final String UPDATE_REL_TAG = "INSDSeq_update-release";
    protected static final String CREATE_REL_TAG = "INSDSeq_create-release";
    protected static final String DEFINITION_TAG = "INSDSeq_definition";
    protected static final String DATABASE_XREF_TAG = "INSDSeq_database-reference";
    protected static final String XREF_TAG = "INSDXref";
    protected static final String ACCESSION_TAG = "INSDSeq_primary-accession";
    protected static final String ACC_VERSION_TAG = "INSDSeq_accession-version";
    protected static final String SECONDARY_ACCESSIONS_GROUP_TAG = "INSDSeq_secondary-accessions";
    protected static final String SECONDARY_ACCESSION_TAG = "INSDSecondary-accn";
    protected static final String OTHER_SEQIDS_GROUP_TAG = "INSDSeq_other-seqids";
    protected static final String OTHER_SEQID_TAG = "INSDSeqid";
    protected static final String KEYWORDS_GROUP_TAG = "INSDSeq_keywords";
    protected static final String KEYWORD_TAG = "INSDKeyword";
    protected static final String SOURCE_TAG = "INSDSeq_source";
    protected static final String ORGANISM_TAG = "INSDSeq_organism";
    protected static final String TAXONOMY_TAG = "INSDSeq_taxonomy";
    protected static final String REFERENCES_GROUP_TAG = "INSDSeq_references";
    protected static final String REFERENCE_TAG = "INSDReference";
    protected static final String REFERENCE_LOCATION_TAG = "INSDReference_reference";
    protected static final String REFERENCE_POSITION_TAG = "INSDReference_position";
    protected static final String TITLE_TAG = "INSDReference_title";
    protected static final String JOURNAL_TAG = "INSDReference_journal";
    protected static final String PUBMED_TAG = "INSDReference_pubmed";
    protected static final String XREF_DBNAME_TAG = "INSDXref_dbname";
    protected static final String XREF_ID_TAG = "INSDXref_id";
    protected static final String REMARK_TAG = "INSDReference_remark";
    protected static final String AUTHORS_GROUP_TAG = "INSDReference_authors";
    protected static final String AUTHOR_TAG = "INSDAuthor";
    protected static final String CONSORTIUM_TAG = "INSDReference_consortium";
    protected static final String COMMENT_TAG = "INSDSeq_comment";
    protected static final String FEATURES_GROUP_TAG = "INSDSeq_feature-table";
    protected static final String FEATURE_TAG = "INSDFeature";
    protected static final String FEATURE_KEY_TAG = "INSDFeature_key";
    protected static final String FEATURE_LOC_TAG = "INSDFeature_location";
    protected static final String FEATURE_INTERVALS_GROUP_TAG = "INSDFeature_intervals";
    protected static final String FEATURE_INTERVAL_TAG = "INSDInterval";
    protected static final String FEATURE_FROM_TAG = "INSDInterval_from";
    protected static final String FEATURE_TO_TAG = "INSDInterval_to";
    protected static final String FEATURE_POINT_TAG = "INSDInterval_point";
    protected static final String FEATURE_ISCOMP_TAG = "INSDInterval_iscomp";
    protected static final String FEATURE_INTERBP_TAG = "INSDInterval_interbp";
    protected static final String FEATURE_ACCESSION_TAG = "INSDInterval_accession";
    protected static final String FEATURE_OPERATOR_TAG = "INSDFeature_operator";
    protected static final String FEATURE_PARTIAL5_TAG = "INSDFeature_partial5";
    protected static final String FEATURE_PARTIAL3_TAG = "INSDFeature_partial3";
    protected static final String FEATUREQUALS_GROUP_TAG = "INSDFeature_quals";
    protected static final String FEATUREQUAL_TAG = "INSDQualifier";
    protected static final String FEATUREQUAL_NAME_TAG = "INSDQualifier_name";
    protected static final String FEATUREQUAL_VALUE_TAG = "INSDQualifier_value";
    protected static final String SEQUENCE_TAG = "INSDSeq_sequence";
    protected static final String CONTIG_TAG = "INSDSeq_contig";
    protected static final Pattern dbxp;
    protected static final Pattern xmlSchema;
    private PrintWriter pw;
    private XMLWriter xmlWriter;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/biojavax/bio/seq/io/INSDseqFormat$INSDseqHandler.class */
    public class INSDseqHandler extends DefaultHandler {
        private RichSequenceFormat parent;
        private SymbolTokenization symParser;
        private RichSeqIOListener rlistener;
        private Namespace ns;
        private StringBuffer m_currentString;
        private NCBITaxon tax;
        private String organism;
        private String accession;
        private RichFeature.Template templ;
        private String currFeatQual;
        private String currRefLocation;
        private List currRefAuthors;
        private String currRefTitle;
        private String currRefJournal;
        private String currRefPubmed;
        private String currRefRemark;
        private String currRefPosition;
        private String currRefXrefDBName;
        private String currRefXrefID;
        private List currRefXrefs;
        private int rcrossrefCount;

        private INSDseqHandler(RichSequenceFormat richSequenceFormat, SymbolTokenization symbolTokenization, RichSeqIOListener richSeqIOListener, Namespace namespace) {
            this.parent = richSequenceFormat;
            this.symParser = symbolTokenization;
            this.rlistener = richSeqIOListener;
            this.ns = namespace;
            this.m_currentString = new StringBuffer();
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (str3.equals(INSDseqFormat.INSDSEQ_TAG)) {
                try {
                    this.rlistener.startSequence();
                    if (this.ns == null) {
                        this.ns = RichObjectFactory.getDefaultNamespace();
                    }
                    this.rlistener.setNamespace(this.ns);
                    return;
                } catch (ParseException e) {
                    throw new SAXException(e);
                }
            }
            if (str3.equals(INSDseqFormat.REFERENCE_TAG) && !this.parent.getElideReferences()) {
                this.currRefLocation = null;
                this.currRefPosition = null;
                this.currRefAuthors = new ArrayList();
                this.currRefTitle = null;
                this.currRefJournal = null;
                this.currRefPubmed = null;
                this.currRefRemark = null;
                this.currRefXrefs = new ArrayList();
                return;
            }
            if (str3.equals(INSDseqFormat.XREF_TAG) && !this.parent.getElideReferences()) {
                this.currRefXrefDBName = null;
                this.currRefXrefID = null;
                return;
            }
            if (!str3.equals(INSDseqFormat.FEATURE_TAG) || this.parent.getElideFeatures()) {
                return;
            }
            this.templ = new RichFeature.Template();
            this.templ.annotation = new SimpleRichAnnotation();
            this.templ.sourceTerm = Terms.getINSDseqTerm();
            this.templ.featureRelationshipSet = new TreeSet();
            this.templ.rankedCrossRefs = new TreeSet();
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            CrossRef crossRef;
            RichLocation richLocation;
            String trim = this.m_currentString.toString().trim();
            try {
                if (str3.equals(INSDseqFormat.LOCUS_TAG)) {
                    this.rlistener.setName(trim);
                } else if (str3.equals(INSDseqFormat.ACCESSION_TAG)) {
                    this.accession = trim;
                    this.rlistener.setAccession(this.accession);
                } else if (str3.equals(INSDseqFormat.ACC_VERSION_TAG)) {
                    String[] split = trim.split("\\.");
                    this.accession = split[0];
                    this.rlistener.setAccession(this.accession);
                    if (split.length > 1) {
                        this.rlistener.setVersion(Integer.parseInt(split[1]));
                    }
                } else if (str3.equals(INSDseqFormat.SECONDARY_ACCESSION_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getAdditionalAccessionTerm(), trim);
                } else if (str3.equals(INSDseqFormat.OTHER_SEQID_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getOtherSeqIdTerm(), trim);
                } else if (str3.equals(INSDseqFormat.DIVISION_TAG)) {
                    this.rlistener.setDivision(trim);
                } else if (str3.equals(INSDseqFormat.MOLTYPE_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getMolTypeTerm(), trim);
                } else if (str3.equals(INSDseqFormat.UPDATE_DATE_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getDateUpdatedTerm(), trim);
                } else if (str3.equals(INSDseqFormat.UPDATE_REL_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getRelUpdatedTerm(), trim);
                } else if (str3.equals(INSDseqFormat.CREATE_DATE_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getDateCreatedTerm(), trim);
                } else if (str3.equals(INSDseqFormat.CREATE_REL_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getRelCreatedTerm(), trim);
                } else if (str3.equals(INSDseqFormat.STRANDED_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getStrandedTerm(), trim);
                } else if (str3.equals(INSDseqFormat.TOPOLOGY_TAG)) {
                    if ("circular".equals(trim)) {
                        this.rlistener.setCircular(true);
                    }
                } else if (str3.equals(INSDseqFormat.DEFINITION_TAG)) {
                    this.rlistener.setDescription(trim);
                } else if (str3.equals(INSDseqFormat.KEYWORD_TAG)) {
                    this.rlistener.addSequenceProperty(Terms.getKeywordTerm(), trim);
                } else if (str3.equals(INSDseqFormat.COMMENT_TAG) && !this.parent.getElideComments()) {
                    this.rlistener.setComment(trim);
                } else if (str3.equals(INSDseqFormat.DATABASE_XREF_TAG)) {
                    String[] split2 = trim.split(";");
                    CrossRef crossRef2 = (CrossRef) RichObjectFactory.getObject(SimpleCrossRef.class, new Object[]{split2[0].trim(), split2[1].trim(), new Integer(0)});
                    for (int i = 2; i < split2.length; i++) {
                        try {
                            crossRef2.getRichAnnotation().addNote(new SimpleNote(Terms.getAdditionalAccessionTerm(), split2[i].trim(), i - 1));
                        } catch (ChangeVetoException e) {
                            ParseException parseException = new ParseException("Could not annotate identifier terms");
                            parseException.initCause(e);
                            throw parseException;
                        }
                    }
                    this.rlistener.setRankedCrossRef(new SimpleRankedCrossRef(crossRef2, 0));
                } else if (str3.equals(INSDseqFormat.SEQUENCE_TAG) && !this.parent.getElideSymbols()) {
                    try {
                        SimpleSymbolList simpleSymbolList = new SimpleSymbolList(this.symParser, trim.replaceAll("\\s+", TagValueParser.EMPTY_LINE_EOR).replaceAll("[\\.|~]", "-"));
                        this.rlistener.addSymbols(this.symParser.getAlphabet(), (Symbol[]) simpleSymbolList.toList().toArray(new Symbol[0]), 0, simpleSymbolList.length());
                    } catch (Exception e2) {
                        throw new ParseException(e2);
                    }
                } else {
                    if (str3.equals(INSDseqFormat.CONTIG_TAG)) {
                        throw new SAXException("Cannot handle contigs yet");
                    }
                    if (str3.equals(INSDseqFormat.REFERENCE_LOCATION_TAG) && !this.parent.getElideReferences()) {
                        this.currRefLocation = trim;
                    } else if (str3.equals(INSDseqFormat.REFERENCE_POSITION_TAG) && !this.parent.getElideReferences()) {
                        this.currRefPosition = trim;
                    } else if (str3.equals(INSDseqFormat.AUTHOR_TAG) && !this.parent.getElideReferences()) {
                        this.currRefAuthors.add(new SimpleDocRefAuthor(trim, false, false));
                    } else if (str3.equals(INSDseqFormat.CONSORTIUM_TAG) && !this.parent.getElideReferences()) {
                        this.currRefAuthors.add(new SimpleDocRefAuthor(trim, true, false));
                    } else if (str3.equals(INSDseqFormat.TITLE_TAG) && !this.parent.getElideReferences()) {
                        this.currRefTitle = trim;
                    } else if (str3.equals(INSDseqFormat.JOURNAL_TAG) && !this.parent.getElideReferences()) {
                        this.currRefJournal = trim;
                    } else if (str3.equals(INSDseqFormat.XREF_DBNAME_TAG) && !this.parent.getElideReferences()) {
                        this.currRefXrefDBName = trim;
                    } else if (str3.equals(INSDseqFormat.XREF_ID_TAG) && !this.parent.getElideReferences()) {
                        this.currRefXrefID = trim;
                    } else if (str3.equals(INSDseqFormat.XREF_TAG) && !this.parent.getElideReferences()) {
                        this.currRefXrefs.add((CrossRef) RichObjectFactory.getObject(SimpleCrossRef.class, new Object[]{this.currRefXrefDBName, this.currRefXrefID, new Integer(0)}));
                    } else if (str3.equals(INSDseqFormat.PUBMED_TAG) && !this.parent.getElideReferences()) {
                        this.currRefPubmed = trim;
                    } else if (str3.equals(INSDseqFormat.REMARK_TAG) && !this.parent.getElideReferences() && !this.parent.getElideComments()) {
                        this.currRefRemark = trim;
                    } else if (str3.equals(INSDseqFormat.REFERENCE_TAG) && !this.parent.getElideReferences()) {
                        if (this.currRefPubmed != null) {
                            crossRef = (CrossRef) RichObjectFactory.getObject(SimpleCrossRef.class, new Object[]{RichSequence.Terms.PUBMED_KEY, this.currRefPubmed, new Integer(0)});
                        } else {
                            CrossRef crossRef3 = null;
                            CrossRef crossRef4 = null;
                            CrossRef crossRef5 = null;
                            for (int i2 = 0; i2 < this.currRefXrefs.size(); i2++) {
                                CrossRef crossRef6 = (CrossRef) this.currRefXrefs.get(i2);
                                if (crossRef6.getDbname().equals("pubmed")) {
                                    crossRef3 = crossRef6;
                                } else if (crossRef6.getDbname().equals("doi")) {
                                    crossRef4 = crossRef6;
                                } else {
                                    crossRef5 = crossRef6;
                                }
                            }
                            crossRef = crossRef3 != null ? crossRef3 : crossRef4 != null ? crossRef4 : crossRef5;
                        }
                        try {
                            DocRef docRef = (DocRef) RichObjectFactory.getObject(SimpleDocRef.class, new Object[]{this.currRefAuthors, this.currRefJournal, this.currRefTitle});
                            if (crossRef != null) {
                                docRef.setCrossref(crossRef);
                            }
                            docRef.setRemark(this.currRefRemark);
                            if (this.currRefPosition != null) {
                                if (this.currRefPosition.equals(TagValueParser.EMPTY_LINE_EOR) || this.currRefPosition.equals("sites")) {
                                    richLocation = RichLocation.EMPTY_LOCATION;
                                } else {
                                    ArrayList arrayList = new ArrayList();
                                    String[] split3 = this.currRefPosition.split(";\\s+");
                                    for (int i3 = 0; i3 < split3.length; i3++) {
                                        String[] split4 = split3[i3].split("\\.\\.");
                                        if (split4.length > 1) {
                                            arrayList.add(new SimpleRichLocation(new SimplePosition(Integer.parseInt(split4[0])), new SimplePosition(Integer.parseInt(split4[1])), i3));
                                        } else {
                                            arrayList.add(new SimpleRichLocation(new SimplePosition(Integer.parseInt(split4[0])), i3));
                                        }
                                    }
                                    richLocation = RichLocation.Tools.construct(arrayList);
                                }
                                this.rlistener.setRankedDocRef(new SimpleRankedDocRef(docRef, richLocation, 0));
                            } else {
                                this.rlistener.setRankedDocRef(new SimpleRankedDocRef(docRef, new Integer(1), new Integer(1), 0));
                            }
                        } catch (ChangeVetoException e3) {
                            throw new ParseException(e3);
                        }
                    } else if (str3.equals(INSDseqFormat.FEATURE_KEY_TAG) && !this.parent.getElideFeatures()) {
                        this.templ.typeTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm(trim);
                    } else if (str3.equals(INSDseqFormat.FEATURE_LOC_TAG) && !this.parent.getElideFeatures()) {
                        this.templ.location = GenbankLocationParser.parseLocation(this.ns, this.accession, trim.replaceAll("\\s+", TagValueParser.EMPTY_LINE_EOR));
                        this.rlistener.startFeature(this.templ);
                        this.rcrossrefCount = 0;
                    } else if (str3.equals(INSDseqFormat.FEATUREQUAL_NAME_TAG) && !this.parent.getElideFeatures()) {
                        if (this.currFeatQual != null) {
                            this.rlistener.addFeatureProperty(RichObjectFactory.getDefaultOntology().getOrCreateTerm(this.currFeatQual), null);
                        }
                        this.currFeatQual = trim;
                    } else if (!str3.equals(INSDseqFormat.FEATUREQUAL_VALUE_TAG) || this.parent.getElideFeatures()) {
                        if (str3.equals(INSDseqFormat.FEATURE_TAG) && !this.parent.getElideFeatures()) {
                            this.rlistener.endFeature();
                        } else if (str3.equals(INSDseqFormat.INSDSEQ_TAG)) {
                            this.rlistener.endSequence();
                        }
                    } else if (this.currFeatQual.equalsIgnoreCase("db_xref")) {
                        Matcher matcher = INSDseqFormat.dbxp.matcher(trim);
                        if (!matcher.matches()) {
                            throw new ParseException("Bad dbxref found: " + trim);
                        }
                        String group = matcher.group(1);
                        String group2 = matcher.group(2);
                        if (group.equalsIgnoreCase("taxon")) {
                            this.tax = (NCBITaxon) RichObjectFactory.getObject(SimpleNCBITaxon.class, new Object[]{Integer.valueOf(group2)});
                            this.rlistener.setTaxon(this.tax);
                            try {
                                if (this.organism != null) {
                                    this.tax.addName(NCBITaxon.SCIENTIFIC, this.organism);
                                }
                            } catch (ChangeVetoException e4) {
                                throw new ParseException(e4);
                            }
                        } else {
                            try {
                                CrossRef crossRef7 = (CrossRef) RichObjectFactory.getObject(SimpleCrossRef.class, new Object[]{group, group2, new Integer(0)});
                                int i4 = this.rcrossrefCount + 1;
                                this.rcrossrefCount = i4;
                                this.rlistener.getCurrentFeature().addRankedCrossRef(new SimpleRankedCrossRef(crossRef7, i4));
                            } catch (ChangeVetoException e5) {
                                throw new ParseException(e5);
                            }
                        }
                        this.currFeatQual = null;
                    } else {
                        if (this.currFeatQual.equalsIgnoreCase("organism")) {
                            try {
                                this.organism = trim;
                                if (this.tax != null) {
                                    this.tax.addName(NCBITaxon.SCIENTIFIC, this.organism);
                                }
                            } catch (ChangeVetoException e6) {
                                throw new ParseException(e6);
                            }
                        } else {
                            if (this.currFeatQual.equalsIgnoreCase("translation")) {
                                trim = trim.replaceAll("\\s+", TagValueParser.EMPTY_LINE_EOR);
                            }
                            this.rlistener.addFeatureProperty(RichObjectFactory.getDefaultOntology().getOrCreateTerm(this.currFeatQual), trim);
                        }
                        this.currFeatQual = null;
                    }
                }
                this.m_currentString.setLength(0);
            } catch (ParseException e7) {
                throw new SAXException(e7);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) {
            this.m_currentString.append(cArr, i, i2);
        }
    }

    /* loaded from: input_file:org/biojavax/bio/seq/io/INSDseqFormat$Terms.class */
    public static class Terms extends RichSequence.Terms {
        public static ComparableTerm getOtherSeqIdTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("OtherSeqID");
        }

        public static ComparableTerm getINSDseqTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm(INSDseqFormat.INSDSEQ_FORMAT);
        }
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat.BasicFormat, org.biojavax.bio.seq.io.RichSequenceFormat
    public boolean canRead(File file) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        bufferedReader.readLine();
        String readLine = bufferedReader.readLine();
        boolean z = readLine != null && xmlSchema.matcher(readLine).matches();
        bufferedReader.close();
        return z;
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat.BasicFormat, org.biojavax.bio.seq.io.RichSequenceFormat
    public SymbolTokenization guessSymbolTokenization(File file) throws IOException {
        return RichSequence.IOTools.getDNAParser();
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public boolean canRead(BufferedInputStream bufferedInputStream) throws IOException {
        bufferedInputStream.mark(2000);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(bufferedInputStream));
        bufferedReader.readLine();
        String readLine = bufferedReader.readLine();
        boolean z = readLine != null && xmlSchema.matcher(readLine).matches();
        bufferedInputStream.reset();
        return z;
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public SymbolTokenization guessSymbolTokenization(BufferedInputStream bufferedInputStream) throws IOException {
        return RichSequence.IOTools.getDNAParser();
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public boolean readSequence(BufferedReader bufferedReader, SymbolTokenization symbolTokenization, SeqIOListener seqIOListener) throws IllegalSymbolException, IOException, ParseException {
        if (seqIOListener instanceof RichSeqIOListener) {
            return readRichSequence(bufferedReader, symbolTokenization, (RichSeqIOListener) seqIOListener, null);
        }
        throw new IllegalArgumentException("Only accepting RichSeqIOListeners today");
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public boolean readRichSequence(BufferedReader bufferedReader, SymbolTokenization symbolTokenization, RichSeqIOListener richSeqIOListener, Namespace namespace) throws IllegalSymbolException, IOException, ParseException {
        try {
            return XMLTools.readXMLChunk(bufferedReader, new INSDseqHandler(this, symbolTokenization, richSeqIOListener, namespace), INSDSEQ_TAG);
        } catch (ParserConfigurationException e) {
            throw new ParseException(e);
        } catch (SAXException e2) {
            throw new ParseException(e2);
        }
    }

    private XMLWriter getXMLWriter() {
        if (this.xmlWriter == null) {
            this.pw = new PrintWriter(getPrintStream());
            this.xmlWriter = new PrettyXMLWriter(this.pw);
        }
        return this.xmlWriter;
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public void beginWriting() throws IOException {
        XMLWriter xMLWriter = getXMLWriter();
        xMLWriter.printRaw("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
        xMLWriter.printRaw("<!DOCTYPE INSDSeq PUBLIC \"-//EMBL-EBI//INSD INSDSeq/EN\" \"http://www.insdc.org/files/documents/INSD_V1.4.dtd\">");
        xMLWriter.openTag(INSDSEQS_GROUP_TAG);
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public void finishWriting() throws IOException {
        getXMLWriter().closeTag(INSDSEQS_GROUP_TAG);
        this.pw.flush();
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public void writeSequence(Sequence sequence, PrintStream printStream) throws IOException {
        if (getPrintStream() == null) {
            setPrintStream(getPrintStream());
        }
        writeSequence(sequence, RichObjectFactory.getDefaultNamespace());
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public void writeSequence(Sequence sequence, String str, PrintStream printStream) throws IOException {
        if (getPrintStream() == null) {
            setPrintStream(getPrintStream());
        }
        if (!str.equals(getDefaultFormat())) {
            throw new IllegalArgumentException("Unknown format: " + str);
        }
        writeSequence(sequence, RichObjectFactory.getDefaultNamespace());
    }

    @Override // org.biojavax.bio.seq.io.RichSequenceFormat
    public void writeSequence(Sequence sequence, Namespace namespace) throws IOException {
        try {
            RichSequence enrich = sequence instanceof RichSequence ? (RichSequence) sequence : RichSequence.Tools.enrich(sequence);
            Set<Note> noteSet = enrich.getNoteSet();
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            ArrayList arrayList3 = new ArrayList();
            String str = null;
            String str2 = null;
            String str3 = null;
            String str4 = null;
            String str5 = null;
            String name = enrich.getAlphabet().getName();
            for (Note note : noteSet) {
                if (note.getTerm().equals(Terms.getStrandedTerm())) {
                    str = note.getValue();
                } else if (note.getTerm().equals(Terms.getDateUpdatedTerm())) {
                    str2 = note.getValue();
                } else if (note.getTerm().equals(Terms.getDateCreatedTerm())) {
                    str3 = note.getValue();
                } else if (note.getTerm().equals(Terms.getRelUpdatedTerm())) {
                    str4 = note.getValue();
                } else if (note.getTerm().equals(Terms.getRelCreatedTerm())) {
                    str5 = note.getValue();
                } else if (note.getTerm().equals(Terms.getMolTypeTerm())) {
                    name = note.getValue();
                } else if (note.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
                    arrayList.add(note.getValue());
                } else if (note.getTerm().equals(Terms.getOtherSeqIdTerm())) {
                    arrayList2.add(note.getValue());
                } else if (note.getTerm().equals(Terms.getKeywordTerm())) {
                    arrayList3.add(note.getValue());
                }
            }
            XMLWriter xMLWriter = getXMLWriter();
            xMLWriter.openTag(INSDSEQ_TAG);
            xMLWriter.openTag(LOCUS_TAG);
            xMLWriter.print(enrich.getName());
            xMLWriter.closeTag(LOCUS_TAG);
            xMLWriter.openTag(LENGTH_TAG);
            xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + enrich.length());
            xMLWriter.closeTag(LENGTH_TAG);
            if (str != null) {
                xMLWriter.openTag(STRANDED_TAG);
                xMLWriter.print(str);
                xMLWriter.closeTag(STRANDED_TAG);
            }
            if (name != null) {
                xMLWriter.openTag(MOLTYPE_TAG);
                xMLWriter.print(name);
                xMLWriter.closeTag(MOLTYPE_TAG);
            }
            xMLWriter.openTag(TOPOLOGY_TAG);
            if (enrich.getCircular()) {
                xMLWriter.print("circular");
            } else {
                xMLWriter.print("linear");
            }
            xMLWriter.closeTag(TOPOLOGY_TAG);
            if (enrich.getDivision() != null) {
                xMLWriter.openTag(DIVISION_TAG);
                xMLWriter.print(enrich.getDivision());
                xMLWriter.closeTag(DIVISION_TAG);
            }
            xMLWriter.openTag(UPDATE_DATE_TAG);
            xMLWriter.print(str2);
            xMLWriter.closeTag(UPDATE_DATE_TAG);
            if (str3 != null) {
                xMLWriter.openTag(CREATE_DATE_TAG);
                xMLWriter.print(str3);
                xMLWriter.closeTag(CREATE_DATE_TAG);
            }
            if (str4 != null) {
                xMLWriter.openTag(UPDATE_REL_TAG);
                xMLWriter.print(str4);
                xMLWriter.closeTag(UPDATE_REL_TAG);
            }
            if (str5 != null) {
                xMLWriter.openTag(CREATE_REL_TAG);
                xMLWriter.print(str5);
                xMLWriter.closeTag(CREATE_REL_TAG);
            }
            if (enrich.getDescription() != null) {
                xMLWriter.openTag(DEFINITION_TAG);
                xMLWriter.print(enrich.getDescription());
                xMLWriter.closeTag(DEFINITION_TAG);
            }
            xMLWriter.openTag(ACC_VERSION_TAG);
            xMLWriter.print(enrich.getAccession() + Position.IN_RANGE + enrich.getVersion());
            xMLWriter.closeTag(ACC_VERSION_TAG);
            if (!arrayList2.isEmpty()) {
                xMLWriter.openTag(OTHER_SEQIDS_GROUP_TAG);
                Iterator it = arrayList2.iterator();
                while (it.hasNext()) {
                    xMLWriter.openTag(OTHER_SEQID_TAG);
                    xMLWriter.print((String) it.next());
                    xMLWriter.closeTag(OTHER_SEQID_TAG);
                }
                xMLWriter.closeTag(OTHER_SEQIDS_GROUP_TAG);
            }
            if (!arrayList.isEmpty()) {
                xMLWriter.openTag(SECONDARY_ACCESSIONS_GROUP_TAG);
                Iterator it2 = arrayList.iterator();
                while (it2.hasNext()) {
                    xMLWriter.openTag(SECONDARY_ACCESSION_TAG);
                    xMLWriter.print((String) it2.next());
                    xMLWriter.closeTag(SECONDARY_ACCESSION_TAG);
                }
                xMLWriter.closeTag(SECONDARY_ACCESSIONS_GROUP_TAG);
            }
            if (!arrayList3.isEmpty()) {
                xMLWriter.openTag(KEYWORDS_GROUP_TAG);
                Iterator it3 = arrayList3.iterator();
                while (it3.hasNext()) {
                    xMLWriter.openTag(KEYWORD_TAG);
                    xMLWriter.print((String) it3.next());
                    xMLWriter.closeTag(KEYWORD_TAG);
                }
                xMLWriter.closeTag(KEYWORDS_GROUP_TAG);
            }
            NCBITaxon taxon = enrich.getTaxon();
            if (taxon != null) {
                xMLWriter.openTag(SOURCE_TAG);
                xMLWriter.print(taxon.getDisplayName());
                xMLWriter.closeTag(SOURCE_TAG);
                xMLWriter.openTag(ORGANISM_TAG);
                xMLWriter.print(taxon.getDisplayName().split("\\(")[0].trim());
                xMLWriter.closeTag(ORGANISM_TAG);
                xMLWriter.openTag(TAXONOMY_TAG);
                String nameHierarchy = taxon.getNameHierarchy();
                xMLWriter.print(nameHierarchy.substring(0, nameHierarchy.length() - 1));
                xMLWriter.closeTag(TAXONOMY_TAG);
            }
            if (!enrich.getRankedDocRefs().isEmpty()) {
                xMLWriter.openTag(REFERENCES_GROUP_TAG);
                for (RankedDocRef rankedDocRef : enrich.getRankedDocRefs()) {
                    xMLWriter.openTag(REFERENCE_TAG);
                    DocRef documentReference = rankedDocRef.getDocumentReference();
                    xMLWriter.openTag(REFERENCE_LOCATION_TAG);
                    xMLWriter.print(Integer.toString(rankedDocRef.getRank()));
                    xMLWriter.closeTag(REFERENCE_LOCATION_TAG);
                    RichLocation location = rankedDocRef.getLocation();
                    if (!location.equals(RichLocation.EMPTY_LOCATION)) {
                        xMLWriter.openTag(REFERENCE_POSITION_TAG);
                        Iterator<Location> blockIterator = location.blockIterator();
                        while (blockIterator.hasNext()) {
                            RichLocation richLocation = (RichLocation) blockIterator.next();
                            xMLWriter.print(richLocation.getMin() + ".." + richLocation.getMax());
                            if (blockIterator.hasNext()) {
                                xMLWriter.print("; ");
                            }
                        }
                        xMLWriter.closeTag(REFERENCE_POSITION_TAG);
                    }
                    xMLWriter.openTag(AUTHORS_GROUP_TAG);
                    List<DocRefAuthor> authorList = documentReference.getAuthorList();
                    Iterator<DocRefAuthor> it4 = authorList.iterator();
                    while (it4.hasNext()) {
                        DocRefAuthor next = it4.next();
                        if (!next.isConsortium()) {
                            xMLWriter.openTag(AUTHOR_TAG);
                            xMLWriter.print(next.getName());
                            xMLWriter.closeTag(AUTHOR_TAG);
                            it4.remove();
                        }
                    }
                    xMLWriter.closeTag(AUTHORS_GROUP_TAG);
                    if (!authorList.isEmpty()) {
                        DocRefAuthor next2 = authorList.iterator().next();
                        xMLWriter.openTag(CONSORTIUM_TAG);
                        xMLWriter.print(next2.getName());
                        xMLWriter.closeTag(CONSORTIUM_TAG);
                    }
                    if (documentReference.getTitle() != null) {
                        xMLWriter.openTag(TITLE_TAG);
                        xMLWriter.print(documentReference.getTitle());
                        xMLWriter.closeTag(TITLE_TAG);
                    }
                    xMLWriter.openTag(JOURNAL_TAG);
                    xMLWriter.print(documentReference.getLocation());
                    xMLWriter.closeTag(JOURNAL_TAG);
                    CrossRef crossref = documentReference.getCrossref();
                    if (crossref != null) {
                        if (crossref.getDbname().equals(RichSequence.Terms.PUBMED_KEY)) {
                            xMLWriter.openTag(PUBMED_TAG);
                            xMLWriter.print(crossref.getAccession());
                            xMLWriter.closeTag(PUBMED_TAG);
                        } else {
                            xMLWriter.openTag(XREF_TAG);
                            xMLWriter.openTag(XREF_DBNAME_TAG);
                            xMLWriter.print(crossref.getDbname());
                            xMLWriter.closeTag(XREF_DBNAME_TAG);
                            xMLWriter.openTag(XREF_ID_TAG);
                            xMLWriter.print(crossref.getAccession());
                            xMLWriter.closeTag(XREF_ID_TAG);
                            xMLWriter.closeTag(XREF_TAG);
                        }
                    }
                    if (documentReference.getRemark() != null) {
                        xMLWriter.openTag(REMARK_TAG);
                        xMLWriter.print(documentReference.getRemark());
                        xMLWriter.closeTag(REMARK_TAG);
                    }
                    xMLWriter.closeTag(REFERENCE_TAG);
                }
                xMLWriter.closeTag(REFERENCES_GROUP_TAG);
            }
            if (!enrich.getComments().isEmpty()) {
                xMLWriter.openTag(COMMENT_TAG);
                Iterator<Comment> it5 = enrich.getComments().iterator();
                while (it5.hasNext()) {
                    xMLWriter.println(it5.next().getComment());
                }
                xMLWriter.closeTag(COMMENT_TAG);
            }
            if (!enrich.getRankedCrossRefs().isEmpty()) {
                CrossRef crossRef = enrich.getRankedCrossRefs().iterator().next().getCrossRef();
                Set<Note> noteSet2 = crossRef.getNoteSet();
                StringBuffer stringBuffer = new StringBuffer();
                stringBuffer.append(crossRef.getDbname().toUpperCase());
                stringBuffer.append("; ");
                stringBuffer.append(crossRef.getAccession());
                for (Note note2 : noteSet2) {
                    if (note2.getTerm().equals(Terms.getAdditionalAccessionTerm())) {
                        stringBuffer.append("; ");
                        stringBuffer.append(note2.getValue());
                    }
                }
                xMLWriter.openTag(DATABASE_XREF_TAG);
                xMLWriter.print(stringBuffer.toString());
                xMLWriter.closeTag(DATABASE_XREF_TAG);
            }
            if (!enrich.getFeatureSet().isEmpty()) {
                xMLWriter.openTag(FEATURES_GROUP_TAG);
                Iterator<Feature> it6 = enrich.getFeatureSet().iterator();
                while (it6.hasNext()) {
                    RichFeature richFeature = (RichFeature) it6.next();
                    xMLWriter.openTag(FEATURE_TAG);
                    xMLWriter.openTag(FEATURE_KEY_TAG);
                    xMLWriter.print(richFeature.getTypeTerm().getName());
                    xMLWriter.closeTag(FEATURE_KEY_TAG);
                    xMLWriter.openTag(FEATURE_LOC_TAG);
                    xMLWriter.print(GenbankLocationParser.writeLocation((RichLocation) richFeature.getLocation()));
                    xMLWriter.closeTag(FEATURE_LOC_TAG);
                    xMLWriter.openTag(FEATURE_INTERVALS_GROUP_TAG);
                    RichLocation richLocation2 = (RichLocation) richFeature.getLocation();
                    boolean z = true;
                    boolean z2 = false;
                    boolean z3 = false;
                    ComparableTerm term = richLocation2.getTerm();
                    Iterator<Location> blockIterator2 = richLocation2.blockIterator();
                    while (blockIterator2.hasNext()) {
                        xMLWriter.openTag(FEATURE_INTERVAL_TAG);
                        RichLocation richLocation3 = (RichLocation) blockIterator2.next();
                        if (richLocation3.getMin() == richLocation3.getMax()) {
                            xMLWriter.openTag(FEATURE_POINT_TAG);
                            xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + richLocation3.getMin());
                            xMLWriter.closeTag(FEATURE_POINT_TAG);
                        } else {
                            xMLWriter.openTag(FEATURE_FROM_TAG);
                            xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + richLocation3.getMin());
                            xMLWriter.closeTag(FEATURE_FROM_TAG);
                            xMLWriter.openTag(FEATURE_TO_TAG);
                            xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + richLocation3.getMax());
                            xMLWriter.closeTag(FEATURE_TO_TAG);
                        }
                        boolean equals = richLocation3.getStrand().equals(RichLocation.Strand.NEGATIVE_STRAND);
                        boolean z4 = (richLocation3.getMinPosition().getType() != null && richLocation3.getMinPosition().getType().equals(Position.BETWEEN_BASES)) || (richLocation3.getMaxPosition().getType() != null && richLocation3.getMaxPosition().getType().equals(Position.BETWEEN_BASES));
                        if (z && richLocation3.getMinPosition().getFuzzyStart()) {
                            z2 = true;
                        }
                        if (!blockIterator2.hasNext() && richLocation3.getMaxPosition().getFuzzyEnd()) {
                            z3 = true;
                        }
                        z = false;
                        xMLWriter.openTag(FEATURE_ISCOMP_TAG);
                        xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + equals);
                        xMLWriter.closeTag(FEATURE_ISCOMP_TAG);
                        xMLWriter.openTag(FEATURE_INTERBP_TAG);
                        xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + z4);
                        xMLWriter.closeTag(FEATURE_INTERBP_TAG);
                        xMLWriter.openTag(FEATURE_ACCESSION_TAG);
                        xMLWriter.print(((RichSequence) richFeature.getSequence()).getAccession());
                        xMLWriter.closeTag(FEATURE_ACCESSION_TAG);
                        xMLWriter.closeTag(FEATURE_INTERVAL_TAG);
                    }
                    if (term != null) {
                        xMLWriter.openTag(FEATURE_OPERATOR_TAG);
                        xMLWriter.print(term.getName());
                        xMLWriter.closeTag(FEATURE_OPERATOR_TAG);
                    }
                    xMLWriter.openTag(FEATURE_PARTIAL5_TAG);
                    xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + z2);
                    xMLWriter.closeTag(FEATURE_PARTIAL5_TAG);
                    xMLWriter.openTag(FEATURE_PARTIAL3_TAG);
                    xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + z3);
                    xMLWriter.closeTag(FEATURE_PARTIAL3_TAG);
                    xMLWriter.closeTag(FEATURE_INTERVALS_GROUP_TAG);
                    xMLWriter.openTag(FEATUREQUALS_GROUP_TAG);
                    for (Note note3 : richFeature.getNoteSet()) {
                        xMLWriter.openTag(FEATUREQUAL_TAG);
                        xMLWriter.openTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.print(TagValueParser.EMPTY_LINE_EOR + note3.getTerm().getName());
                        xMLWriter.closeTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.openTag(FEATUREQUAL_VALUE_TAG);
                        if (note3.getValue() != null && !note3.getValue().equals(TagValueParser.EMPTY_LINE_EOR)) {
                            if (note3.getTerm().getName().equalsIgnoreCase("translation")) {
                                for (String str6 : StringTools.wordWrap(note3.getValue(), "\\s+", getLineWidth())) {
                                    xMLWriter.println(str6);
                                }
                            } else {
                                xMLWriter.print(note3.getValue());
                            }
                        }
                        xMLWriter.closeTag(FEATUREQUAL_VALUE_TAG);
                        xMLWriter.closeTag(FEATUREQUAL_TAG);
                    }
                    if (richFeature.getType().equalsIgnoreCase("source") && taxon != null) {
                        xMLWriter.openTag(FEATUREQUAL_TAG);
                        xMLWriter.openTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.print("db_xref");
                        xMLWriter.closeTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.openTag(FEATUREQUAL_VALUE_TAG);
                        xMLWriter.print("taxon:" + taxon.getNCBITaxID());
                        xMLWriter.closeTag(FEATUREQUAL_VALUE_TAG);
                        xMLWriter.closeTag(FEATUREQUAL_TAG);
                        String displayName = taxon.getDisplayName();
                        if (displayName.indexOf(40) > -1) {
                            displayName = displayName.substring(0, displayName.indexOf(40)).trim();
                        }
                        xMLWriter.openTag(FEATUREQUAL_TAG);
                        xMLWriter.openTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.print("organism");
                        xMLWriter.closeTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.openTag(FEATUREQUAL_VALUE_TAG);
                        xMLWriter.print(displayName);
                        xMLWriter.closeTag(FEATUREQUAL_VALUE_TAG);
                        xMLWriter.closeTag(FEATUREQUAL_TAG);
                    }
                    Iterator<RankedCrossRef> it7 = richFeature.getRankedCrossRefs().iterator();
                    while (it7.hasNext()) {
                        CrossRef crossRef2 = it7.next().getCrossRef();
                        xMLWriter.openTag(FEATUREQUAL_TAG);
                        xMLWriter.openTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.print("db_xref");
                        xMLWriter.closeTag(FEATUREQUAL_NAME_TAG);
                        xMLWriter.openTag(FEATUREQUAL_VALUE_TAG);
                        xMLWriter.print(crossRef2.getDbname() + ":" + crossRef2.getAccession());
                        xMLWriter.closeTag(FEATUREQUAL_VALUE_TAG);
                        xMLWriter.closeTag(FEATUREQUAL_TAG);
                    }
                    xMLWriter.closeTag(FEATUREQUALS_GROUP_TAG);
                    xMLWriter.closeTag(FEATURE_TAG);
                }
                xMLWriter.closeTag(FEATURES_GROUP_TAG);
            }
            xMLWriter.openTag(SEQUENCE_TAG);
            for (String str7 : StringTools.wordWrap(enrich.seqString(), "\\s+", getLineWidth())) {
                xMLWriter.println(str7);
            }
            xMLWriter.closeTag(SEQUENCE_TAG);
            xMLWriter.closeTag(INSDSEQ_TAG);
            this.pw.flush();
        } catch (ChangeVetoException e) {
            throw new IOException("Unable to enrich sequence", e);
        }
    }

    @Override // org.biojava.bio.seq.io.SequenceFormat
    public String getDefaultFormat() {
        return INSDSEQ_FORMAT;
    }

    static {
        RichSequence.IOTools.registerFormat(INSDseqFormat.class);
        dbxp = Pattern.compile("^([^:]+):(\\S+)$");
        xmlSchema = Pattern.compile(".*http://www\\.ebi\\.ac\\.uk/dtd/INSD_INSDSeq\\.dtd.*");
    }
}
