package org.apache.spark.examples.mllib;

import com.google.common.io.Files;
import java.io.File;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.examples.mllib.DatasetExample;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer;
import org.apache.spark.mllib.util.MLUtils$;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
import scala.MatchError;
import scala.Predef$;
import scala.StringContext;
import scala.collection.immutable.Nil$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;
import scopt.OptionParser;
import scopt.Read$;

/* compiled from: DatasetExample.scala */
/* loaded from: input_file:org/apache/spark/examples/mllib/DatasetExample$.class */
public final class DatasetExample$ {
    public static final DatasetExample$ MODULE$ = null;

    static {
        new DatasetExample$();
    }

    public void main(String[] strArr) {
        new OptionParser<DatasetExample.Params>() { // from class: org.apache.spark.examples.mllib.DatasetExample$$anon$1
            {
                head(Predef$.MODULE$.wrapRefArray(new String[]{"Dataset: an example app using DataFrame as a Dataset for ML."}));
                opt("input", Read$.MODULE$.stringRead()).text(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"input path to dataset"})).s(Nil$.MODULE$)).action(new DatasetExample$$anon$1$$anonfun$2(this));
                opt("dataFormat", Read$.MODULE$.stringRead()).text("data format: libsvm (default), dense (deprecated in Spark v1.1)").action(new DatasetExample$$anon$1$$anonfun$3(this));
                checkConfig(new DatasetExample$$anon$1$$anonfun$4(this));
            }
        }.parse(Predef$.MODULE$.wrapRefArray(strArr), new DatasetExample.Params(DatasetExample$Params$.MODULE$.apply$default$1(), DatasetExample$Params$.MODULE$.apply$default$2())).map(new DatasetExample$$anonfun$main$1()).getOrElse(new DatasetExample$$anonfun$main$2());
    }

    public void run(DatasetExample.Params params) {
        RDD loadLibSVMFile;
        SparkContext sparkContext = new SparkContext(new SparkConf().setAppName(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"DatasetExample with ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{params}))));
        SQLContext sQLContext = new SQLContext(sparkContext);
        String dataFormat = params.dataFormat();
        if ("dense" != 0 ? "dense".equals(dataFormat) : dataFormat == null) {
            loadLibSVMFile = MLUtils$.MODULE$.loadLabeledPoints(sparkContext, params.input());
        } else {
            if ("libsvm" != 0 ? !"libsvm".equals(dataFormat) : dataFormat != null) {
                throw new MatchError(dataFormat);
            }
            loadLibSVMFile = MLUtils$.MODULE$.loadLibSVMFile(sparkContext, params.input());
        }
        RDD rdd = loadLibSVMFile;
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Loaded ", " instances from file: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(rdd.count()), params.input()})));
        DataFrame df = sQLContext.implicits().rddToDataFrameHolder(rdd, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: org.apache.spark.examples.mllib.DatasetExample$$typecreator2$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.mllib.regression.LabeledPoint").asType().toTypeConstructor();
            }
        })).toDF();
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Inferred schema:\\n", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{df.schema().prettyJson()})));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Converted to DataFrame with ", " records"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(df.count())})));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Selected label column with average value ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToDouble(BoxesRunTime.unboxToDouble(df.select("label", Predef$.MODULE$.wrapRefArray(new String[0])).map(new DatasetExample$$anonfun$5(), ClassTag$.MODULE$.Double()).fold(BoxesRunTime.boxToDouble(0.0d), new DatasetExample$$anonfun$1())) / r0.count())})));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Selected features column with average values:\\n ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{((MultivariateOnlineSummarizer) df.select("features", Predef$.MODULE$.wrapRefArray(new String[0])).map(new DatasetExample$$anonfun$6(), ClassTag$.MODULE$.apply(Vector.class)).aggregate(new MultivariateOnlineSummarizer(), new DatasetExample$$anonfun$7(), new DatasetExample$$anonfun$8(), ClassTag$.MODULE$.apply(MultivariateOnlineSummarizer.class))).mean().toString()})));
        File createTempDir = Files.createTempDir();
        createTempDir.deleteOnExit();
        String file = new File(createTempDir, "dataset").toString();
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Saving to ", " as Parquet file."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{file})));
        df.write().parquet(file);
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Loading Parquet file with UDT from ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{file})));
        DataFrame parquet = sQLContext.read().parquet(Predef$.MODULE$.wrapRefArray(new String[]{file}));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Schema from Parquet: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{parquet.schema().prettyJson()})));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Selected features column with average values:\\n ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{((MultivariateOnlineSummarizer) parquet.select("features", Predef$.MODULE$.wrapRefArray(new String[0])).map(new DatasetExample$$anonfun$9(), ClassTag$.MODULE$.apply(Vector.class)).aggregate(new MultivariateOnlineSummarizer(), new DatasetExample$$anonfun$10(), new DatasetExample$$anonfun$11(), ClassTag$.MODULE$.apply(MultivariateOnlineSummarizer.class))).mean().toString()})));
        sparkContext.stop();
    }

    private DatasetExample$() {
        MODULE$ = this;
    }
}
