package org.molgenis.data.annotation.entity.impl;

import com.google.common.collect.Iterators;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.molgenis.MolgenisFieldTypes;
import org.molgenis.data.AttributeMetaData;
import org.molgenis.data.Entity;
import org.molgenis.data.EntityMetaData;
import org.molgenis.data.MolgenisDataException;
import org.molgenis.data.annotation.AbstractRepositoryAnnotator;
import org.molgenis.data.annotation.CmdLineAnnotatorSettingsConfigurer;
import org.molgenis.data.annotation.RepositoryAnnotator;
import org.molgenis.data.annotation.entity.AnnotatorInfo;
import org.molgenis.data.annotation.impl.cmdlineannotatorsettingsconfigurer.SingleFileLocationCmdLineAnnotatorSettingsConfigurer;
import org.molgenis.data.annotation.utils.JarRunner;
import org.molgenis.data.annotation.utils.JarRunnerImpl;
import org.molgenis.data.annotator.websettings.SnpEffAnnotatorSettings;
import org.molgenis.data.support.DefaultAttributeMetaData;
import org.molgenis.data.support.DefaultEntityMetaData;
import org.molgenis.data.support.MapEntity;
import org.molgenis.data.vcf.VcfRepository;
import org.molgenis.security.core.runas.RunAsSystemProxy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
/* loaded from: input_file:org/molgenis/data/annotation/entity/impl/SnpEffAnnotator.class */
public class SnpEffAnnotator {
    private static final Logger LOG = LoggerFactory.getLogger(SnpEffAnnotator.class);
    public static final String NAME = "snpEff";
    public static final String ANNOTATION = "Annotation";
    public static final String PUTATIVE_IMPACT = "Putative_impact";
    public static final String GENE_NAME = "Gene_Name";
    public static final String GENE_ID = "Gene_ID";
    public static final String FEATURE_TYPE = "Feature_type";
    public static final String FEATURE_ID = "Feature_ID";
    public static final String TRANSCRIPT_BIOTYPE = "Transcript_biotype";
    public static final String RANK_TOTAL = "Rank_total";
    public static final String HGVS_C = "HGVS_c";
    public static final String HGVS_P = "HGVS_p";
    public static final String C_DNA_POSITION = "cDNA_position";
    public static final String CDS_POSITION = "CDS_position";
    public static final String PROTEIN_POSITION = "Protein_position";
    public static final String DISTANCE_TO_FEATURE = "Distance_to_feature";
    public static final String ERRORS = "Errors";
    public static final String LOF = "LOF";
    public static final String NMD = "NMD";

    @Autowired
    private JarRunner jarRunner;

    @Autowired
    private Entity snpEffAnnotatorSettings;

    /* loaded from: input_file:org/molgenis/data/annotation/entity/impl/SnpEffAnnotator$Impact.class */
    public enum Impact {
        MODIFIER,
        LOW,
        MODERATE,
        HIGH
    }

    /* loaded from: input_file:org/molgenis/data/annotation/entity/impl/SnpEffAnnotator$SnpEffRepositoryAnnotator.class */
    public static class SnpEffRepositoryAnnotator extends AbstractRepositoryAnnotator {
        private static final String CHARSET = "UTF-8";
        private String snpEffPath;
        private final Entity pluginSettings;
        private final AnnotatorInfo info = AnnotatorInfo.create(AnnotatorInfo.Status.READY, AnnotatorInfo.Type.EFFECT_PREDICTION, SnpEffAnnotator.NAME, "Genetic variant annotation and effect prediction toolbox. It annotates and predicts the effects of variants on genes (such as amino acid changes). ", getOutputMetaData());
        private final JarRunner jarRunner;

        public SnpEffRepositoryAnnotator(Entity entity, JarRunner jarRunner) {
            this.pluginSettings = entity;
            this.jarRunner = jarRunner;
        }

        @Override // org.molgenis.data.annotation.RepositoryAnnotator
        public AnnotatorInfo getInfo() {
            return this.info;
        }

        @Override // org.molgenis.data.annotation.RepositoryAnnotator
        public Iterator<Entity> annotate(Iterable<Entity> iterable) {
            try {
                return annotateRepository(iterable, getInputVcfTempFile(iterable));
            } catch (IOException e) {
                throw new MolgenisDataException("Exception running SnpEff", e);
            }
        }

        public Iterator<Entity> annotateRepository(Iterable<Entity> iterable, File file) {
            try {
                final Iterator<Entity> it = iterable.iterator();
                if (!it.hasNext()) {
                    return Iterators.emptyIterator();
                }
                final BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(this.jarRunner.runJar(SnpEffAnnotator.NAME, Arrays.asList("-Xmx2g", getSnpEffPath(), "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5"), file).getAbsolutePath()), CHARSET));
                return new Iterator<Entity>() { // from class: org.molgenis.data.annotation.entity.impl.SnpEffAnnotator.SnpEffRepositoryAnnotator.1
                    @Override // java.util.Iterator
                    public boolean hasNext() {
                        boolean hasNext = it.hasNext();
                        if (!hasNext) {
                            IOUtils.closeQuietly(bufferedReader);
                        }
                        return hasNext;
                    }

                    /* JADX WARN: Can't rename method to resolve collision */
                    @Override // java.util.Iterator
                    public Entity next() {
                        Entity entity = (Entity) it.next();
                        DefaultEntityMetaData defaultEntityMetaData = new DefaultEntityMetaData(entity.getEntityMetaData());
                        List<AttributeMetaData> outputAttributes = SnpEffRepositoryAnnotator.this.info.getOutputAttributes();
                        defaultEntityMetaData.getClass();
                        outputAttributes.forEach(attributeMetaData -> {
                            defaultEntityMetaData.addAttributeMetaData(attributeMetaData, new EntityMetaData.AttributeRole[0]);
                        });
                        Entity mapEntity = new MapEntity(entity, defaultEntityMetaData);
                        try {
                            SnpEffRepositoryAnnotator.this.parseOutputLineToEntity(SnpEffRepositoryAnnotator.this.readLine(bufferedReader), mapEntity);
                            return mapEntity;
                        } catch (IOException e) {
                            throw new UncheckedIOException(e);
                        }
                    }
                };
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            } catch (InterruptedException e2) {
                throw new MolgenisDataException("Exception running SnpEff", e2);
            }
        }

        /* JADX INFO: Access modifiers changed from: private */
        public String readLine(BufferedReader bufferedReader) throws IOException {
            String str;
            String readLine = bufferedReader.readLine();
            while (true) {
                str = readLine;
                if (str == null || !str.startsWith("#")) {
                    break;
                }
                readLine = bufferedReader.readLine();
            }
            return str;
        }

        public File getInputVcfTempFile(Iterable<Entity> iterable) throws IOException {
            File createTempFile = File.createTempFile(SnpEffAnnotator.NAME, ".vcf");
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(createTempFile), CHARSET));
            Throwable th = null;
            try {
                try {
                    for (Entity entity : iterable) {
                        bufferedWriter.write(entity.getString("#CHROM") + "\t" + entity.getString("POS") + "\t.\t" + entity.getString("REF") + "\t" + entity.getString("ALT") + "\n");
                    }
                    if (bufferedWriter != null) {
                        if (0 != 0) {
                            try {
                                bufferedWriter.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            bufferedWriter.close();
                        }
                    }
                    return createTempFile;
                } finally {
                }
            } catch (Throwable th3) {
                if (bufferedWriter != null) {
                    if (th != null) {
                        try {
                            bufferedWriter.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        bufferedWriter.close();
                    }
                }
                throw th3;
            }
        }

        public void parseOutputLineToEntity(String str, Entity entity) {
            if (str != null) {
                String str2 = "";
                String str3 = "";
                String[] split = str.split("\t")[7].split(";");
                String[] split2 = split[0].split(Pattern.quote("|"), -1);
                if (split.length > 1) {
                    if (split[1].startsWith("LOF=")) {
                        str2 = split[1];
                    } else if (split[1].startsWith("NMD=")) {
                        str3 = split[1];
                    }
                }
                if (split.length > 2) {
                    if (split[2].startsWith("LOF=")) {
                        str2 = split[2];
                    } else if (split[2].startsWith("NMD=")) {
                        str3 = split[2];
                    }
                }
                if (split2.length < 15) {
                    SnpEffAnnotator.LOG.info("No results for CHROM:{} POS:{} REF:{} ALT:{} ", new Object[]{entity.getString("#CHROM"), entity.getString("POS"), entity.getString("REF"), entity.getString("ALT")});
                    return;
                }
                entity.set(SnpEffAnnotator.ANNOTATION, split2[1]);
                entity.set(SnpEffAnnotator.PUTATIVE_IMPACT, split2[2]);
                entity.set("Gene_Name", split2[3]);
                entity.set(SnpEffAnnotator.GENE_ID, split2[4]);
                entity.set(SnpEffAnnotator.FEATURE_TYPE, split2[5]);
                entity.set(SnpEffAnnotator.FEATURE_ID, split2[6]);
                entity.set(SnpEffAnnotator.TRANSCRIPT_BIOTYPE, split2[7]);
                entity.set(SnpEffAnnotator.RANK_TOTAL, split2[8]);
                entity.set(SnpEffAnnotator.HGVS_C, split2[9]);
                entity.set(SnpEffAnnotator.HGVS_P, split2[10]);
                entity.set(SnpEffAnnotator.C_DNA_POSITION, split2[11]);
                entity.set(SnpEffAnnotator.CDS_POSITION, split2[12]);
                entity.set(SnpEffAnnotator.PROTEIN_POSITION, split2[13]);
                entity.set(SnpEffAnnotator.DISTANCE_TO_FEATURE, split2[14]);
                entity.set(SnpEffAnnotator.ERRORS, split2[15]);
                entity.set(SnpEffAnnotator.LOF, str2.replace("LOF=", ""));
                entity.set(SnpEffAnnotator.NMD, str3.replace("NMD=", ""));
            }
        }

        @Override // org.molgenis.data.annotation.AbstractRepositoryAnnotator, org.molgenis.data.annotation.RepositoryAnnotator
        public String canAnnotate(EntityMetaData entityMetaData) {
            return super.canAnnotate(entityMetaData);
        }

        @Override // org.molgenis.data.annotation.RepositoryAnnotator
        public List<AttributeMetaData> getOutputMetaData() {
            ArrayList arrayList = new ArrayList();
            DefaultAttributeMetaData defaultAttributeMetaData = new DefaultAttributeMetaData(SnpEffAnnotator.ANNOTATION, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData.setDescription("Annotated using Sequence Ontology terms. Multiple effects can be concatenated using ‘&’ (source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData);
            DefaultAttributeMetaData defaultAttributeMetaData2 = new DefaultAttributeMetaData(SnpEffAnnotator.PUTATIVE_IMPACT, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData2.setDescription(" A simple estimation of putative impact / deleteriousness : {HIGH, MODERATE, LOW, MODIFIER}(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData2);
            DefaultAttributeMetaData defaultAttributeMetaData3 = new DefaultAttributeMetaData("Gene_Name", MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData3.setDescription("Common gene name (HGNC). Optional: use closest gene when the variant is “intergenic”(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData3);
            DefaultAttributeMetaData defaultAttributeMetaData4 = new DefaultAttributeMetaData(SnpEffAnnotator.GENE_ID, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData4.setDescription("Gene ID");
            arrayList.add(defaultAttributeMetaData4);
            DefaultAttributeMetaData defaultAttributeMetaData5 = new DefaultAttributeMetaData(SnpEffAnnotator.FEATURE_TYPE, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData5.setDescription("Which type of feature is in the next field (e.g. transcript, motif, miRNA, etc.). It is preferred to use Sequence Ontology (SO) terms, but ‘custom’ (user defined) are allowed. ANN=A|stop_gained|HIGH|||transcript|... Tissue specific features may include cell type / tissue information separated by semicolon e.g.: ANN=A|histone_binding_site|LOW|||H3K4me3:HeLa-S3|...\nFeature ID: Depending on the annotation, this may be: Transcript ID (preferably using version number), Motif ID, miRNA, ChipSeq peak, Histone mark, etc. Note: Some features may not have ID (e.g. histone marks from custom Chip-Seq experiments may not have a unique ID). (source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData5);
            DefaultAttributeMetaData defaultAttributeMetaData6 = new DefaultAttributeMetaData(SnpEffAnnotator.FEATURE_ID, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData6.setDescription("Depending on the annotation, this may be: Transcript ID (preferably using version number), Motif ID, miRNA, ChipSeq peak, Histone mark, etc. Note: Some features may not have ID (e.g. histone marks from custom Chip-Seq experiments may not have a unique ID).(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData6);
            DefaultAttributeMetaData defaultAttributeMetaData7 = new DefaultAttributeMetaData(SnpEffAnnotator.TRANSCRIPT_BIOTYPE, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData7.setDescription("The bare minimum is at least a description on whether the transcript is {“Coding”, “Noncoding”}. Whenever possible, use ENSEMBL biotypes.(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData7);
            DefaultAttributeMetaData defaultAttributeMetaData8 = new DefaultAttributeMetaData(SnpEffAnnotator.RANK_TOTAL, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData8.setDescription("Exon or Intron rank / total number of exons or introns(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData8);
            DefaultAttributeMetaData defaultAttributeMetaData9 = new DefaultAttributeMetaData(SnpEffAnnotator.HGVS_C, MolgenisFieldTypes.FieldTypeEnum.TEXT);
            defaultAttributeMetaData9.setDescription("Variant using HGVS notation (DNA level)(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData9);
            DefaultAttributeMetaData defaultAttributeMetaData10 = new DefaultAttributeMetaData(SnpEffAnnotator.HGVS_P, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData10.setDescription("If variant is coding, this field describes the variant using HGVS notation (Protein level). Since transcript ID is already mentioned in ‘feature ID’, it may be omitted here.(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData10);
            DefaultAttributeMetaData defaultAttributeMetaData11 = new DefaultAttributeMetaData(SnpEffAnnotator.C_DNA_POSITION, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData11.setDescription("Position in cDNA and trancript’s cDNA length (one based)(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData11);
            DefaultAttributeMetaData defaultAttributeMetaData12 = new DefaultAttributeMetaData(SnpEffAnnotator.CDS_POSITION, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData12.setDescription("Position and number of coding bases (one based includes START and STOP codons)(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData12);
            DefaultAttributeMetaData defaultAttributeMetaData13 = new DefaultAttributeMetaData(SnpEffAnnotator.PROTEIN_POSITION, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData13.setDescription("Position and number of AA (one based, including START, but not STOP)");
            arrayList.add(defaultAttributeMetaData13);
            DefaultAttributeMetaData defaultAttributeMetaData14 = new DefaultAttributeMetaData(SnpEffAnnotator.DISTANCE_TO_FEATURE, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData14.setDescription("All items in this field are options, so the field could be empty. Up/Downstream: Distance to first / last codon Intergenic: Distance to closest gene Distance to closest Intron boundary in exon (+/- up/downstream). If same, use positive number. Distance to closest exon boundary in Intron (+/- up/downstream) Distance to first base in MOTIF Distance to first base in miRNA Distance to exon-intron boundary in splice_site or splice _region ChipSeq peak: Distance to summit (or peak center) Histone mark / Histone state: Distance to summit (or peak center)(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData14);
            DefaultAttributeMetaData defaultAttributeMetaData15 = new DefaultAttributeMetaData(SnpEffAnnotator.ERRORS, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData15.setDescription("Add errors, warnings oErrors, Warnings or Information messages: Add errors, warnings or r informative message that can affect annotation accuracy. It can be added using either ‘codes’ (as shown in column 1, e.g. W1) or ‘message types’ (as shown in column 2, e.g. WARNING_REF_DOES_NOT_MATCH_GENOME). All these errors, warnings or information messages messages are optional.(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData15);
            DefaultAttributeMetaData defaultAttributeMetaData16 = new DefaultAttributeMetaData(SnpEffAnnotator.LOF, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData16.setDescription("snpEff can estimate if a variant is deemed to have a loss of function on the protein.(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData16);
            DefaultAttributeMetaData defaultAttributeMetaData17 = new DefaultAttributeMetaData(SnpEffAnnotator.NMD, MolgenisFieldTypes.FieldTypeEnum.STRING);
            defaultAttributeMetaData17.setDescription("Nonsense mediate decay assessment. Some mutations may cause mRNA to be degraded thus not translated into a protein. NMD analysis marks mutations that are estimated to trigger nonsense mediated decay.(source:http://snpeff.sourceforge.net)");
            arrayList.add(defaultAttributeMetaData17);
            DefaultAttributeMetaData defaultAttributeMetaData18 = new DefaultAttributeMetaData(getFullName(), MolgenisFieldTypes.FieldTypeEnum.COMPOUND);
            defaultAttributeMetaData18.setLabel(getSimpleName());
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                defaultAttributeMetaData18.addAttributePart((AttributeMetaData) it.next());
            }
            return Collections.singletonList(defaultAttributeMetaData18);
        }

        @Override // org.molgenis.data.annotation.RepositoryAnnotator
        public List<AttributeMetaData> getRequiredAttributes() {
            ArrayList arrayList = new ArrayList();
            arrayList.add(VcfRepository.CHROM_META);
            arrayList.add(VcfRepository.POS_META);
            arrayList.add(VcfRepository.REF_META);
            arrayList.add(VcfRepository.ALT_META);
            return arrayList;
        }

        @Override // org.molgenis.data.annotation.RepositoryAnnotator
        public String getSimpleName() {
            return SnpEffAnnotator.NAME;
        }

        @Override // org.molgenis.data.annotation.RepositoryAnnotator
        public boolean annotationDataExists() {
            return getSnpEffPath() != null;
        }

        private String getSnpEffPath() {
            if (this.pluginSettings != null && this.snpEffPath == null) {
                this.snpEffPath = (String) RunAsSystemProxy.runAsSystem(() -> {
                    return this.pluginSettings.getString(SnpEffAnnotatorSettings.Meta.SNPEFF_JAR_LOCATION);
                });
                if (this.snpEffPath != null) {
                    File file = new File(this.snpEffPath);
                    if (file.exists() && file.isFile()) {
                        SnpEffAnnotator.LOG.info("SnpEff found at: " + file.getAbsolutePath());
                    } else {
                        SnpEffAnnotator.LOG.debug("SnpEff not found at: " + file.getAbsolutePath());
                        this.snpEffPath = null;
                    }
                }
            }
            return this.snpEffPath;
        }

        @Override // org.molgenis.data.annotation.RepositoryAnnotator
        public CmdLineAnnotatorSettingsConfigurer getCmdLineAnnotatorSettingsConfigurer() {
            return new SingleFileLocationCmdLineAnnotatorSettingsConfigurer(SnpEffAnnotatorSettings.Meta.SNPEFF_JAR_LOCATION, this.pluginSettings);
        }
    }

    @Bean
    public RepositoryAnnotator snpEff() {
        return new SnpEffRepositoryAnnotator(this.snpEffAnnotatorSettings, this.jarRunner);
    }

    @Bean
    JarRunner jarRunner() {
        return new JarRunnerImpl();
    }

    public static String getGeneNameFromEntity(Entity entity) {
        String string;
        String str = null;
        if (entity.getString("Gene_Name") != null) {
            str = entity.getString("Gene_Name");
        }
        if (str == null && (string = entity.getString("ANN")) != null) {
            String[] split = string.split("\\|", -1);
            if (split.length > 10) {
                if (split[3].length() != 0) {
                    str = split[3];
                } else {
                    LOG.info("No gene symbol in ANN field for " + entity.toString());
                }
            }
        }
        return str;
    }
}
