package pipelines.nlp;

import nodes.nlp.NGramsCounts;
import nodes.nlp.NGramsCountsMode$;
import nodes.nlp.NGramsFeaturizer$mcI$sp;
import nodes.nlp.StupidBackoffEstimator$;
import nodes.nlp.StupidBackoffEstimator$mcI$sp;
import nodes.nlp.StupidBackoffModel;
import nodes.nlp.Tokenizer;
import nodes.nlp.Tokenizer$;
import nodes.nlp.WordFrequencyEncoder$;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.rdd.RDD;
import pipelines.nlp.StupidBackoffPipeline;
import scala.Predef$;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.immutable.StringOps;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichInt$;
import scopt.OptionParser;
import scopt.Read$;

/* compiled from: StupidBackoffPipeline.scala */
/* loaded from: input_file:pipelines/nlp/StupidBackoffPipeline$.class */
public final class StupidBackoffPipeline$ {
    public static final StupidBackoffPipeline$ MODULE$ = null;
    private final String appName;

    static {
        new StupidBackoffPipeline$();
    }

    public String appName() {
        return this.appName;
    }

    /* JADX WARN: Type inference failed for: r0v0, types: [pipelines.nlp.StupidBackoffPipeline$$anon$1] */
    public StupidBackoffPipeline.StupidBackoffConfig parse(String[] strArr) {
        return (StupidBackoffPipeline.StupidBackoffConfig) new OptionParser<StupidBackoffPipeline.StupidBackoffConfig>() { // from class: pipelines.nlp.StupidBackoffPipeline$$anon$1
            {
                StupidBackoffPipeline$.MODULE$.appName();
                head(Predef$.MODULE$.wrapRefArray(new String[]{StupidBackoffPipeline$.MODULE$.appName(), "0.1"}));
                opt("trainData", Read$.MODULE$.stringRead()).required().action(new StupidBackoffPipeline$$anon$1$$anonfun$1(this));
                opt("numParts", Read$.MODULE$.stringRead()).required().action(new StupidBackoffPipeline$$anon$1$$anonfun$2(this));
                opt("n", Read$.MODULE$.stringRead()).optional().action(new StupidBackoffPipeline$$anon$1$$anonfun$3(this));
            }
        }.parse(Predef$.MODULE$.wrapRefArray(strArr), new StupidBackoffPipeline.StupidBackoffConfig(StupidBackoffPipeline$StupidBackoffConfig$.MODULE$.apply$default$1(), StupidBackoffPipeline$StupidBackoffConfig$.MODULE$.apply$default$2(), StupidBackoffPipeline$StupidBackoffConfig$.MODULE$.apply$default$3())).get();
    }

    /* JADX WARN: Type inference failed for: r0v10, types: [nodes.nlp.WordFrequencyTransformer] */
    public void main(String[] strArr) {
        StupidBackoffPipeline.StupidBackoffConfig parse = parse(strArr);
        SparkConf appName = new SparkConf().setAppName(appName());
        appName.setIfMissing("spark.master", "local[4]");
        SparkContext sparkContext = new SparkContext(appName);
        RDD<Seq<String>> apply = new Tokenizer(Tokenizer$.MODULE$.apply$default$1()).apply(sparkContext.textFile(parse.trainData(), parse.numParts()));
        ?? fit2 = WordFrequencyEncoder$.MODULE$.fit2(apply);
        StupidBackoffModel<Object> fit$mcI$sp = new StupidBackoffEstimator$mcI$sp(fit2.unigramCounts(), StupidBackoffEstimator$.MODULE$.apply$default$2(), ClassTag$.MODULE$.Int()).fit$mcI$sp(new NGramsCounts(NGramsCountsMode$.MODULE$.NoAdd(), ClassTag$.MODULE$.Int()).apply(fit2.then(new NGramsFeaturizer$mcI$sp(RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(2), parse.n()), ClassTag$.MODULE$.Int()), ClassTag$.MODULE$.apply(Seq.class)).apply((RDD) apply)));
        fit$mcI$sp.scoresRDD().cache();
        Predef$.MODULE$.println(new StringOps(Predef$.MODULE$.augmentString(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"|number of tokens: ", "\n          |size of vocabulary: ", "\n          |number of ngrams: ", "\n          |"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(fit$mcI$sp.numTokens()), BoxesRunTime.boxToInteger(fit$mcI$sp.unigramCounts().size()), BoxesRunTime.boxToLong(fit$mcI$sp.scoresRDD().count())})))).stripMargin());
        Predef$.MODULE$.println("trained scores of 100 ngrams in the corpus:");
        Predef$.MODULE$.refArrayOps((Object[]) fit$mcI$sp.scoresRDD().take(100)).foreach(new StupidBackoffPipeline$$anonfun$main$1());
        sparkContext.stop();
    }

    private StupidBackoffPipeline$() {
        MODULE$ = this;
        this.appName = "StupidBackoffPipeline";
    }
}
