package bio.ferlab.datalake.spark3.publictables.enriched;

import bio.ferlab.datalake.commons.config.Coalesce;
import bio.ferlab.datalake.commons.config.Coalesce$;
import bio.ferlab.datalake.commons.config.Configuration;
import bio.ferlab.datalake.commons.config.DatasetConf;
import bio.ferlab.datalake.spark3.etl.ETLSingleDestination;
import bio.ferlab.datalake.spark3.implicits.DatasetConfImplicits$;
import bio.ferlab.datalake.spark3.implicits.SparkUtils$;
import java.time.LocalDateTime;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import scala.Function1;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;

/* compiled from: Genes.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005Ud\u0001B\r\u001b\u0001\u001dB\u0011B\f\u0001\u0003\u0002\u0003\u0006YaL\u001c\t\u000bq\u0002A\u0011A\u001f\t\u000f\t\u0003!\u0019!C\u0001\u0007\"1q\t\u0001Q\u0001\n\u0011Cq\u0001\u0013\u0001C\u0002\u0013\u00051\t\u0003\u0004J\u0001\u0001\u0006I\u0001\u0012\u0005\b\u0015\u0002\u0011\r\u0011\"\u0001D\u0011\u0019Y\u0005\u0001)A\u0005\t\"9A\n\u0001b\u0001\n\u0003\u0019\u0005BB'\u0001A\u0003%A\tC\u0004O\u0001\t\u0007I\u0011A\"\t\r=\u0003\u0001\u0015!\u0003E\u0011\u001d\u0001\u0006A1A\u0005\u0002\rCa!\u0015\u0001!\u0002\u0013!\u0005b\u0002*\u0001\u0005\u0004%\ta\u0011\u0005\u0007'\u0002\u0001\u000b\u0011\u0002#\t\u000bQ\u0003A\u0011I+\t\u000f\u0005}\u0001\u0001\"\u0011\u0002\"\u00191\u0011q\u0006\u0001\u0002\u0003cA\u0011\"a\u000f\u0014\u0005\u0003\u0005\u000b\u0011B4\t\rq\u001aB\u0011AA\u001f\u0011\u001d\t)e\u0005C\u0001\u0003\u000fB\u0011\"!\u001a\u0001\u0003\u0003%\u0019!a\u001a\t\u000f\u0005-\u0004\u0001\"\u0011\u0002n\t)q)\u001a8fg*\u00111\u0004H\u0001\tK:\u0014\u0018n\u00195fI*\u0011QDH\u0001\raV\u0014G.[2uC\ndWm\u001d\u0006\u0003?\u0001\naa\u001d9be.\u001c$BA\u0011#\u0003!!\u0017\r^1mC.,'BA\u0012%\u0003\u00191WM\u001d7bE*\tQ%A\u0002cS>\u001c\u0001a\u0005\u0002\u0001QA\u0011\u0011\u0006L\u0007\u0002U)\u00111FH\u0001\u0004KRd\u0017BA\u0017+\u0005Q)E\u000bT*j]\u001edW\rR3ti&t\u0017\r^5p]\u0006!1m\u001c8g!\t\u0001T'D\u00012\u0015\t\u00114'\u0001\u0004d_:4\u0017n\u001a\u0006\u0003i\u0001\nqaY8n[>t7/\u0003\u00027c\ti1i\u001c8gS\u001e,(/\u0019;j_:L!A\f\u001d\n\u0005eR$aA#U\u0019*\u00111HK\u0001\u0003mJ\na\u0001P5oSRtD#\u0001 \u0015\u0005}\n\u0005C\u0001!\u0001\u001b\u0005Q\u0002\"\u0002\u0018\u0003\u0001\by\u0013aD7bS:$Um\u001d;j]\u0006$\u0018n\u001c8\u0016\u0003\u0011\u0003\"\u0001M#\n\u0005\u0019\u000b$a\u0003#bi\u0006\u001cX\r^\"p]\u001a\f\u0001#\\1j]\u0012+7\u000f^5oCRLwN\u001c\u0011\u0002\u001b=l\u0017.\\0hK:,wl]3u\u00039yW.[7`O\u0016tWmX:fi\u0002\n\u0011c\u001c:qQ\u0006tW\r^0hK:,wl]3u\u0003Iy'\u000f\u001d5b]\u0016$xlZ3oK~\u001bX\r\u001e\u0011\u0002\u0019!\u0004xnX4f]\u0016|6/\u001a;\u0002\u001b!\u0004xnX4f]\u0016|6/\u001a;!\u0003-AW/\\1o?\u001e,g.Z:\u0002\u0019!,X.\u00198`O\u0016tWm\u001d\u0011\u0002\u0019\u0011$GmX4f]\u0016|6/\u001a;\u0002\u001b\u0011$GmX4f]\u0016|6/\u001a;!\u0003=\u0019wn]7jG~;WM\\3`g\u0016$\u0018\u0001E2pg6L7mX4f]\u0016|6/\u001a;!\u0003\u001d)\u0007\u0010\u001e:bGR$RAVA\u0004\u00037!\"a\u0016@\u0011\ta\u000bGm\u001a\b\u00033~\u0003\"AW/\u000e\u0003mS!\u0001\u0018\u0014\u0002\rq\u0012xn\u001c;?\u0015\u0005q\u0016!B:dC2\f\u0017B\u00011^\u0003\u0019\u0001&/\u001a3fM&\u0011!m\u0019\u0002\u0004\u001b\u0006\u0004(B\u00011^!\tAV-\u0003\u0002gG\n11\u000b\u001e:j]\u001e\u0004\"\u0001[>\u000f\u0005%DhB\u00016v\u001d\tY'O\u0004\u0002m_:\u0011!,\\\u0005\u0002]\u0006\u0019qN]4\n\u0005A\f\u0018AB1qC\u000eDWMC\u0001o\u0013\t\u0019H/A\u0003ta\u0006\u00148N\u0003\u0002qc&\u0011ao^\u0001\u0004gFd'BA:u\u0013\tI(0A\u0004qC\u000e\\\u0017mZ3\u000b\u0005Y<\u0018B\u0001?~\u0005%!\u0015\r^1Ge\u0006lWM\u0003\u0002zu\")1/\u0005a\u0002\u007fB!\u0011\u0011AA\u0002\u001b\u0005Q\u0018bAA\u0003u\na1\u000b]1sWN+7o]5p]\"I\u0011\u0011B\t\u0011\u0002\u0003\u0007\u00111B\u0001\u0010Y\u0006\u001cHOU;o\t\u0006$X\rV5nKB!\u0011QBA\f\u001b\t\tyA\u0003\u0003\u0002\u0012\u0005M\u0011\u0001\u0002;j[\u0016T!!!\u0006\u0002\t)\fg/Y\u0005\u0005\u00033\tyAA\u0007M_\u000e\fG\u000eR1uKRKW.\u001a\u0005\n\u0003;\t\u0002\u0013!a\u0001\u0003\u0017\t!cY;se\u0016tGOU;o\t\u0006$X\rV5nK\u0006yAO]1og\u001a|'/\\*j]\u001edW\r\u0006\u0005\u0002$\u0005\u001d\u00121FA\u0017)\r9\u0017Q\u0005\u0005\u0006gJ\u0001\u001da \u0005\u0007\u0003S\u0011\u0002\u0019A,\u0002\t\u0011\fG/\u0019\u0005\n\u0003\u0013\u0011\u0002\u0013!a\u0001\u0003\u0017A\u0011\"!\b\u0013!\u0003\u0005\r!a\u0003\u0003\u0019\u0011\u000bG/\u0019$sC6,w\n]:\u0014\u0007M\t\u0019\u0004\u0005\u0003\u00026\u0005]R\"A/\n\u0007\u0005eRL\u0001\u0004B]f\u0014VMZ\u0001\u0003I\u001a$B!a\u0010\u0002DA\u0019\u0011\u0011I\n\u000e\u0003\u0001Aa!a\u000f\u0016\u0001\u00049\u0017\u0001\u00056pS:\fe\u000eZ'fe\u001e,w+\u001b;i)\u001d9\u0017\u0011JA'\u0003CBa!a\u0013\u0017\u0001\u00049\u0017\u0001C4f]\u0016|6/\u001a;\t\u000f\u0005=c\u00031\u0001\u0002R\u00051!n\\5o\u001f:\u0004R!a\u0015\u0002\\\u0011tA!!\u0016\u0002Z9\u0019!,a\u0016\n\u0003yK!!_/\n\t\u0005u\u0013q\f\u0002\u0004'\u0016\f(BA=^\u0011\u0019\t\u0019G\u0006a\u0001I\u0006a\u0011m]\"pYVlgNT1nK\u0006aA)\u0019;b\rJ\fW.Z(qgR!\u0011qHA5\u0011\u0019\tYd\u0006a\u0001O\u0006\u0011B-\u001a4bk2$(+\u001a9beRLG/[8o+\t\ty\u0007\u0005\u0004\u00026\u0005EtmZ\u0005\u0004\u0003gj&!\u0003$v]\u000e$\u0018n\u001c82\u0001")
/* loaded from: input_file:bio/ferlab/datalake/spark3/publictables/enriched/Genes.class */
public class Genes extends ETLSingleDestination {
    private final DatasetConf mainDestination;
    private final DatasetConf omim_gene_set;
    private final DatasetConf orphanet_gene_set;
    private final DatasetConf hpo_gene_set;
    private final DatasetConf human_genes;
    private final DatasetConf ddd_gene_set;
    private final DatasetConf cosmic_gene_set;

    /* compiled from: Genes.scala */
    /* loaded from: input_file:bio/ferlab/datalake/spark3/publictables/enriched/Genes$DataFrameOps.class */
    public class DataFrameOps {
        private final Dataset<Row> df;
        public final /* synthetic */ Genes $outer;

        public Dataset<Row> joinAndMergeWith(Dataset<Row> dataset, Seq<String> seq, String str) {
            return this.df.join(dataset, seq, "left").groupBy("symbol", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.first(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{this.df.apply("*")}))).as("hg"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.collect_list(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{dataset.drop(seq).apply("*")}))).as(str)})).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("hg.*"), functions$.MODULE$.col(str)})).withColumn(str, (Column) SparkUtils$.MODULE$.removeEmptyObjectsIn().apply(str));
        }

        public /* synthetic */ Genes bio$ferlab$datalake$spark3$publictables$enriched$Genes$DataFrameOps$$$outer() {
            return this.$outer;
        }

        public DataFrameOps(Genes genes, Dataset<Row> dataset) {
            this.df = dataset;
            if (genes == null) {
                throw null;
            }
            this.$outer = genes;
        }
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public DatasetConf mainDestination() {
        return this.mainDestination;
    }

    public DatasetConf omim_gene_set() {
        return this.omim_gene_set;
    }

    public DatasetConf orphanet_gene_set() {
        return this.orphanet_gene_set;
    }

    public DatasetConf hpo_gene_set() {
        return this.hpo_gene_set;
    }

    public DatasetConf human_genes() {
        return this.human_genes;
    }

    public DatasetConf ddd_gene_set() {
        return this.ddd_gene_set;
    }

    public DatasetConf cosmic_gene_set() {
        return this.cosmic_gene_set;
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public Map<String, Dataset<Row>> extract(LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(omim_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(omim_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(orphanet_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(orphanet_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(hpo_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(hpo_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(human_genes().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(human_genes()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(ddd_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(ddd_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(cosmic_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(cosmic_gene_set()).read(super.conf(), sparkSession))}));
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public Dataset<Row> transformSingle(Map<String, Dataset<Row>> map, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        Dataset<Row> select = ((Dataset) map.apply(human_genes().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"chromosome"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"symbol"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"entrez_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"omim_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"external_references.hgnc"}))).$(Nil$.MODULE$).as("hgnc"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ensembl_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"map_location"}))).$(Nil$.MODULE$).as("location"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"description"}))).$(Nil$.MODULE$).as("name"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"synonyms"}))).$(Nil$.MODULE$).as("alias"), functions$.MODULE$.regexp_replace(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"type_of_gene"}))).$(Nil$.MODULE$), "-", "_").as("biotype")}));
        Dataset<Row> select2 = ((Dataset) map.apply(orphanet_gene_set().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"gene_symbol"}))).$(Nil$.MODULE$).as("symbol"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"disorder_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"name"}))).$(Nil$.MODULE$).as("panel"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"type_of_inheritance"}))).$(Nil$.MODULE$).as("inheritance")}));
        Dataset<Row> select3 = ((Dataset) map.apply(omim_gene_set().id())).where(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.name"}))).$(Nil$.MODULE$).isNotNull()).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"omim_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.name"}))).$(Nil$.MODULE$).as("name"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.omim_id"}))).$(Nil$.MODULE$).as("omim_id"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.inheritance"}))).$(Nil$.MODULE$).as("inheritance"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.inheritance_code"}))).$(Nil$.MODULE$).as("inheritance_code")}));
        Dataset<Row> withColumn = ((Dataset) map.apply(hpo_gene_set().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"entrez_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_name"}))).$(Nil$.MODULE$)})).distinct().withColumn("hpo_term_label", functions$.MODULE$.concat(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_name"}))).$(Nil$.MODULE$), functions$.MODULE$.lit(" ("), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_id"}))).$(Nil$.MODULE$), functions$.MODULE$.lit(")")})));
        return DataFrameOps(DataFrameOps(DataFrameOps(DataFrameOps(DataFrameOps(select).joinAndMergeWith(select2, (Seq) new $colon.colon("symbol", Nil$.MODULE$), "orphanet")).joinAndMergeWith(withColumn, (Seq) new $colon.colon("entrez_gene_id", Nil$.MODULE$), "hpo")).joinAndMergeWith(select3, (Seq) new $colon.colon("omim_gene_id", Nil$.MODULE$), "omim")).joinAndMergeWith(((Dataset) map.apply(ddd_gene_set().id())).select("disease_name", Predef$.MODULE$.wrapRefArray(new String[]{"symbol"})), (Seq) new $colon.colon("symbol", Nil$.MODULE$), "ddd")).joinAndMergeWith(((Dataset) map.apply(cosmic_gene_set().id())).select("symbol", Predef$.MODULE$.wrapRefArray(new String[]{"tumour_types_germline"})), (Seq) new $colon.colon("symbol", Nil$.MODULE$), "cosmic");
    }

    public DataFrameOps DataFrameOps(Dataset<Row> dataset) {
        return new DataFrameOps(this, dataset);
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public Function1<Dataset<Row>, Dataset<Row>> defaultRepartition() {
        return new Coalesce(Coalesce$.MODULE$.apply$default$1());
    }

    public Genes(Configuration configuration) {
        super(configuration);
        this.mainDestination = super.conf().getDataset("enriched_genes");
        this.omim_gene_set = super.conf().getDataset("normalized_omim_gene_set");
        this.orphanet_gene_set = super.conf().getDataset("normalized_orphanet_gene_set");
        this.hpo_gene_set = super.conf().getDataset("normalized_hpo_gene_set");
        this.human_genes = super.conf().getDataset("normalized_human_genes");
        this.ddd_gene_set = super.conf().getDataset("normalized_ddd_gene_set");
        this.cosmic_gene_set = super.conf().getDataset("normalized_cosmic_gene_set");
    }
}
