package io.smartdatalake.workflow.dataobject;

import io.smartdatalake.util.hdfs.PartitionValues;
import io.smartdatalake.util.hdfs.PartitionValues$;
import io.smartdatalake.util.hdfs.SparkRepartitionDef;
import io.smartdatalake.util.misc.DataFrameUtil$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.Option;
import scala.Predef$;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

/* compiled from: SparkFileDataObject.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005Ud\u0001C\u0001\u0003!\u0003\r\tA\u0002\u0006\u0003'M\u0003\u0018M]6GS2,G)\u0019;b\u001f\nTWm\u0019;\u000b\u0005\r!\u0011A\u00033bi\u0006|'M[3di*\u0011QAB\u0001\to>\u00148N\u001a7po*\u0011q\u0001C\u0001\u000eg6\f'\u000f\u001e3bi\u0006d\u0017m[3\u000b\u0003%\t!![8\u0014\u0011\u0001Y\u0011#\u0006\r\u001c=\u0005\u0002\"\u0001D\b\u000e\u00035Q\u0011AD\u0001\u0006g\u000e\fG.Y\u0005\u0003!5\u0011a!\u00118z%\u00164\u0007C\u0001\n\u0014\u001b\u0005\u0011\u0011B\u0001\u000b\u0003\u0005QA\u0015\rZ8pa\u001aKG.\u001a#bi\u0006|%M[3diB\u0011!CF\u0005\u0003/\t\u0011!cQ1o\u0007J,\u0017\r^3ECR\fgI]1nKB\u0011!#G\u0005\u00035\t\u0011\u0011cQ1o/JLG/\u001a#bi\u00064%/Y7f!\t\u0011B$\u0003\u0002\u001e\u0005\tY2)\u00198De\u0016\fG/Z*ue\u0016\fW.\u001b8h\t\u0006$\u0018M\u0012:b[\u0016\u0004\"AE\u0010\n\u0005\u0001\u0012!!E+tKJ$UMZ5oK\u0012\u001c6\r[3nCB\u0011!CI\u0005\u0003G\t\u0011\u0001cU2iK6\fg+\u00197jI\u0006$\u0018n\u001c8\t\u000b\u0015\u0002A\u0011A\u0014\u0002\r\u0011Jg.\u001b;%\u0007\u0001!\u0012\u0001\u000b\t\u0003\u0019%J!AK\u0007\u0003\tUs\u0017\u000e\u001e\u0005\u0006Y\u00011\t!L\u0001\u0007M>\u0014X.\u0019;\u0016\u00039\u0002\"a\f\u001a\u000f\u00051\u0001\u0014BA\u0019\u000e\u0003\u0019\u0001&/\u001a3fM&\u00111\u0007\u000e\u0002\u0007'R\u0014\u0018N\\4\u000b\u0005Ej\u0001\"\u0002\u001c\u0001\t\u00039\u0014aB8qi&|gn]\u000b\u0002qA!q&\u000f\u0018/\u0013\tQDGA\u0002NCBDQ\u0001\u0010\u0001\u0007\u0002u\naBZ5mK:\fW.Z\"pYVlg.F\u0001?!\raqHL\u0005\u0003\u00016\u0011aa\u00149uS>t\u0007\"\u0002\"\u0001\r\u0003\u0019\u0015\u0001E:qCJ\\'+\u001a9beRLG/[8o+\u0005!\u0005c\u0001\u0007@\u000bB\u0011aiS\u0007\u0002\u000f*\u0011\u0001*S\u0001\u0005Q\u001247O\u0003\u0002K\r\u0005!Q\u000f^5m\u0013\tauIA\nTa\u0006\u00148NU3qCJ$\u0018\u000e^5p]\u0012+g\rC\u0003O\u0001\u0011\u0005q*A\u0006cK\u001a|'/Z,sSR,GC\u0001)k!\t\tvM\u0004\u0002SI:\u00111+\u0019\b\u0003)zs!!V.\u000f\u0005YKV\"A,\u000b\u0005a3\u0013A\u0002\u001fs_>$h(C\u0001[\u0003\ry'oZ\u0005\u00039v\u000ba!\u00199bG\",'\"\u0001.\n\u0005}\u0003\u0017!B:qCJ\\'B\u0001/^\u0013\t\u00117-A\u0002tc2T!a\u00181\n\u0005\u00154\u0017a\u00029bG.\fw-\u001a\u0006\u0003E\u000eL!\u0001[5\u0003\u0013\u0011\u000bG/\u0019$sC6,'BA3g\u0011\u0015YW\n1\u0001Q\u0003\t!g\rC\u0003n\u0001\u0011\u0005a.A\u0005bMR,'OU3bIR\u0011\u0001k\u001c\u0005\u0006W2\u0004\r\u0001\u0015\u0005\u0006c\u0002!\tA]\u0001\u000be\u0016\fGmU2iK6\fGCA:{!\raq\b\u001e\t\u0003kbl\u0011A\u001e\u0006\u0003o\u001a\fQ\u0001^=qKNL!!\u001f<\u0003\u0015M#(/^2u)f\u0004X\rC\u0003|a\u0002\u0007A0\u0001\u0007t_V\u00148-Z#ySN$8\u000f\u0005\u0002\r{&\u0011a0\u0004\u0002\b\u0005>|G.Z1o\u0011\u001d\t\t\u0001\u0001C!\u0003\u0007\tAbZ3u\t\u0006$\u0018M\u0012:b[\u0016$B!!\u0002\u0002\u0014Q\u0019\u0001+a\u0002\t\u000f\u0005%q\u0010q\u0001\u0002\f\u000591/Z:tS>t\u0007\u0003BA\u0007\u0003\u001fi\u0011AZ\u0005\u0004\u0003#1'\u0001D*qCJ\\7+Z:tS>t\u0007\"CA\u000b\u007fB\u0005\t\u0019AA\f\u0003=\u0001\u0018M\u001d;ji&|gNV1mk\u0016\u001c\bCBA\r\u0003C\t9C\u0004\u0003\u0002\u001c\u0005}ab\u0001,\u0002\u001e%\ta\"\u0003\u0002f\u001b%!\u00111EA\u0013\u0005\r\u0019V-\u001d\u0006\u0003K6\u00012ARA\u0015\u0013\r\tYc\u0012\u0002\u0010!\u0006\u0014H/\u001b;j_:4\u0016\r\\;fg\"9\u0011q\u0006\u0001\u0005B\u0005E\u0012!F4fiN#(/Z1nS:<G)\u0019;b\rJ\fW.\u001a\u000b\u0007\u0003g\t9$!\u000f\u0015\u0007A\u000b)\u0004\u0003\u0005\u0002\n\u00055\u00029AA\u0006\u0011\u00191\u0014Q\u0006a\u0001q!9\u00111HA\u0017\u0001\u0004\u0019\u0018A\u00049ja\u0016d\u0017N\\3TG\",W.\u0019\u0005\b\u0003\u007f\u0001A\u0011IA!\u000399(/\u001b;f\t\u0006$\u0018M\u0012:b[\u0016$\u0002\"a\u0011\u0002H\u0005%\u00131\n\u000b\u0004Q\u0005\u0015\u0003\u0002CA\u0005\u0003{\u0001\u001d!a\u0003\t\r-\fi\u00041\u0001Q\u0011)\t)\"!\u0010\u0011\u0002\u0003\u0007\u0011q\u0003\u0005\n\u0003\u001b\ni\u0004%AA\u0002q\f\u0001#[:SK\u000e,(o]5wK&s\u0007/\u001e;\t\u0013\u0005E\u0003!%A\u0005B\u0005M\u0013AF4fi\u0012\u000bG/\u0019$sC6,G\u0005Z3gCVdG\u000fJ\u0019\u0016\u0005\u0005U#\u0006BA\f\u0003/Z#!!\u0017\u0011\t\u0005m\u0013QM\u0007\u0003\u0003;RA!a\u0018\u0002b\u0005IQO\\2iK\u000e\\W\r\u001a\u0006\u0004\u0003Gj\u0011AC1o]>$\u0018\r^5p]&!\u0011qMA/\u0005E)hn\u00195fG.,GMV1sS\u0006t7-\u001a\u0005\n\u0003W\u0002\u0011\u0013!C!\u0003'\n\u0001d\u001e:ji\u0016$\u0015\r^1Ge\u0006lW\r\n3fM\u0006,H\u000e\u001e\u00133\u0011%\ty\u0007AI\u0001\n\u0003\n\t(\u0001\rxe&$X\rR1uC\u001a\u0013\u0018-\\3%I\u00164\u0017-\u001e7uIM*\"!a\u001d+\u0007q\f9\u0006")
/* loaded from: input_file:io/smartdatalake/workflow/dataobject/SparkFileDataObject.class */
public interface SparkFileDataObject extends HadoopFileDataObject, CanCreateDataFrame, CanWriteDataFrame, CanCreateStreamingDataFrame, UserDefinedSchema, SchemaValidation {

    /* compiled from: SparkFileDataObject.scala */
    /* renamed from: io.smartdatalake.workflow.dataobject.SparkFileDataObject$class */
    /* loaded from: input_file:io/smartdatalake/workflow/dataobject/SparkFileDataObject$class.class */
    public abstract class Cclass {
        public static Map options(SparkFileDataObject sparkFileDataObject) {
            return Predef$.MODULE$.Map().apply(Nil$.MODULE$);
        }

        public static Dataset beforeWrite(SparkFileDataObject sparkFileDataObject, Dataset dataset) {
            sparkFileDataObject.validateSchemaMin(dataset);
            return dataset;
        }

        public static Dataset afterRead(SparkFileDataObject sparkFileDataObject, Dataset dataset) {
            sparkFileDataObject.validateSchemaMin(dataset);
            return dataset;
        }

        public static Option readSchema(SparkFileDataObject sparkFileDataObject, boolean z) {
            return sparkFileDataObject.schema();
        }

        public static Dataset getDataFrame(SparkFileDataObject sparkFileDataObject, Seq seq, SparkSession sparkSession) {
            Dataset<Row> load;
            Seq<String> checkWrongPartitionValues = PartitionValues$.MODULE$.checkWrongPartitionValues(seq, sparkFileDataObject.partitions());
            Predef$.MODULE$.assert(checkWrongPartitionValues.isEmpty(), new SparkFileDataObject$$anonfun$getDataFrame$1(sparkFileDataObject, checkWrongPartitionValues));
            boolean checkFilesExisting = sparkFileDataObject.checkFilesExisting(sparkSession);
            if (checkFilesExisting) {
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            } else {
                Predef$.MODULE$.require(sparkFileDataObject.schema().isDefined(), new SparkFileDataObject$$anonfun$getDataFrame$2(sparkFileDataObject));
                BoxesRunTime.boxToBoolean(sparkFileDataObject.filesystem(sparkSession).mkdirs(sparkFileDataObject.hadoopPath()));
            }
            if (sparkFileDataObject.partitions().isEmpty() || seq.isEmpty()) {
                load = DataFrameUtil$.MODULE$.DataFrameReaderUtils(sparkSession.read().format(sparkFileDataObject.format()).options(sparkFileDataObject.options())).optionalSchema(sparkFileDataObject.readSchema(checkFilesExisting)).load(sparkFileDataObject.hadoopPath().toString());
            } else {
                load = (Dataset) ((TraversableOnce) ((Seq) seq.map(new SparkFileDataObject$$anonfun$3(sparkFileDataObject, sparkSession), Seq$.MODULE$.canBuildFrom())).map(new SparkFileDataObject$$anonfun$4(sparkFileDataObject, DataFrameUtil$.MODULE$.DataFrameReaderUtils(sparkSession.read().format(sparkFileDataObject.format()).options(sparkFileDataObject.options())).optionalSchema(sparkFileDataObject.readSchema(checkFilesExisting)).option("basePath", sparkFileDataObject.hadoopPath().toString())), Seq$.MODULE$.canBuildFrom())).reduce(new SparkFileDataObject$$anonfun$5(sparkFileDataObject));
            }
            return sparkFileDataObject.afterRead(DataFrameUtil$.MODULE$.DfSDL(load).withOptionalColumn(sparkFileDataObject.filenameColumn(), functions$.MODULE$.input_file_name()));
        }

        public static Seq getDataFrame$default$1(SparkFileDataObject sparkFileDataObject) {
            return Seq$.MODULE$.apply(Nil$.MODULE$);
        }

        public static Dataset getStreamingDataFrame(SparkFileDataObject sparkFileDataObject, Map map, Option option, SparkSession sparkSession) {
            Predef$.MODULE$.require(sparkFileDataObject.schema().orElse(new SparkFileDataObject$$anonfun$getStreamingDataFrame$2(sparkFileDataObject, option)).isDefined(), new SparkFileDataObject$$anonfun$getStreamingDataFrame$1(sparkFileDataObject));
            if (sparkFileDataObject.filesystem(sparkSession).exists(sparkFileDataObject.hadoopPath().getParent())) {
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            } else {
                BoxesRunTime.boxToBoolean(sparkFileDataObject.filesystem(sparkSession).mkdirs(sparkFileDataObject.hadoopPath()));
            }
            return sparkFileDataObject.afterRead(sparkSession.readStream().format(sparkFileDataObject.format()).options(map).schema((StructType) sparkFileDataObject.schema().orElse(new SparkFileDataObject$$anonfun$6(sparkFileDataObject, option)).get()).load(sparkFileDataObject.hadoopPath().toString()));
        }

        public static void writeDataFrame(SparkFileDataObject sparkFileDataObject, Dataset dataset, Seq seq, boolean z, SparkSession sparkSession) {
            Predef$.MODULE$.require(!z, new SparkFileDataObject$$anonfun$writeDataFrame$1(sparkFileDataObject));
            ObjectRef create = ObjectRef.create(sparkFileDataObject.beforeWrite(dataset));
            create.elem = (Dataset) sparkFileDataObject.sparkRepartition().map(new SparkFileDataObject$$anonfun$writeDataFrame$2(sparkFileDataObject, create, seq)).getOrElse(new SparkFileDataObject$$anonfun$writeDataFrame$3(sparkFileDataObject, create));
            String path = sparkFileDataObject.hadoopPath().toString();
            sparkFileDataObject.logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Writing data frame to ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{path})));
            DataFrameUtil$.MODULE$.DataFrameWriterUtils(((Dataset) create.elem).write().format(sparkFileDataObject.format()).mode(sparkFileDataObject.saveMode()).options(sparkFileDataObject.options())).optionalPartitionBy(sparkFileDataObject.partitions()).save(path);
            sparkFileDataObject.createMissingPartitions(seq, sparkSession);
            sparkFileDataObject.sparkRepartition().flatMap(new SparkFileDataObject$$anonfun$writeDataFrame$4(sparkFileDataObject)).foreach(new SparkFileDataObject$$anonfun$writeDataFrame$5(sparkFileDataObject, sparkSession));
        }

        public static Seq writeDataFrame$default$2(SparkFileDataObject sparkFileDataObject) {
            return Seq$.MODULE$.apply(Nil$.MODULE$);
        }

        public static boolean writeDataFrame$default$3(SparkFileDataObject sparkFileDataObject) {
            return false;
        }

        public static void $init$(SparkFileDataObject sparkFileDataObject) {
            Predef$.MODULE$.assert(sparkFileDataObject.sparkRepartition().flatMap(new SparkFileDataObject$$anonfun$2(sparkFileDataObject)).isEmpty() || sparkFileDataObject.partitions().isEmpty(), new SparkFileDataObject$$anonfun$1(sparkFileDataObject));
        }
    }

    String format();

    Map<String, String> options();

    Option<String> filenameColumn();

    Option<SparkRepartitionDef> sparkRepartition();

    Dataset<Row> beforeWrite(Dataset<Row> dataset);

    Dataset<Row> afterRead(Dataset<Row> dataset);

    Option<StructType> readSchema(boolean z);

    Dataset<Row> getDataFrame(Seq<PartitionValues> seq, SparkSession sparkSession);

    Seq<PartitionValues> getDataFrame$default$1();

    Dataset<Row> getStreamingDataFrame(Map<String, String> map, Option<StructType> option, SparkSession sparkSession);

    void writeDataFrame(Dataset<Row> dataset, Seq<PartitionValues> seq, boolean z, SparkSession sparkSession);

    Seq<PartitionValues> writeDataFrame$default$2();

    boolean writeDataFrame$default$3();
}
