package net.sansa_stack.ml.flink.clustering;

import java.io.StringWriter;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.java.operators.DataSink;
import org.apache.flink.api.scala.DataSet;
import org.apache.flink.api.scala.ExecutionEnvironment;
import org.apache.flink.core.fs.FileSystem;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.IterableLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.mutable.StringBuilder;
import scala.reflect.ClassTag$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;
import scala.runtime.ScalaRunTime$;
import scala.util.control.Breaks$;

/* compiled from: RDFByModularityClustering.scala */
/* loaded from: input_file:net/sansa_stack/ml/flink/clustering/RDFByModularityClustering$.class */
public final class RDFByModularityClustering$ {
    public static final RDFByModularityClustering$ MODULE$ = null;

    static {
        new RDFByModularityClustering$();
    }

    public DataSink<String> apply(ExecutionEnvironment executionEnvironment, int i, String str, String str2) {
        DataSet readTextFile = executionEnvironment.readTextFile(str, executionEnvironment.readTextFile$default$2());
        DataSet distinct = readTextFile.map(new RDFByModularityClustering$$anonfun$1(), BasicArrayTypeInfo.getInfoFor(String[].class), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))).flatMap(new RDFByModularityClustering$$anonfun$2(), BasicTypeInfo.getInfoFor(String.class), ClassTag$.MODULE$.apply(String.class)).distinct();
        long count = readTextFile.count();
        int count2 = (int) distinct.count();
        DataSet map = readTextFile.map(new RDFByModularityClustering$$anonfun$3(), BasicArrayTypeInfo.getInfoFor(String[].class), ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))).map(new RDFByModularityClustering$$anonfun$4(), new RDFByModularityClustering$$anon$9(), ClassTag$.MODULE$.apply(Tuple2.class));
        Predef$.MODULE$.println(new StringBuilder().append("The number of nodes in the knowledge graph is ").append(BoxesRunTime.boxToInteger(count2)).append(" and the number of edges is ").append(BoxesRunTime.boxToLong(count)).append(".").toString());
        ((IterableLike) map.collect().take(10)).foreach(new RDFByModularityClustering$$anonfun$apply$4());
        Map map2 = distinct.map(new RDFByModularityClustering$$anonfun$6((String[]) map.flatMap(new RDFByModularityClustering$$anonfun$5(), BasicTypeInfo.getInfoFor(String.class), ClassTag$.MODULE$.apply(String.class)).collect().toArray(ClassTag$.MODULE$.apply(String.class))), new RDFByModularityClustering$$anon$10(), ClassTag$.MODULE$.apply(Tuple2.class)).collect().toMap(Predef$.MODULE$.$conforms());
        Tuple2[] tuple2Arr = (Tuple2[]) map.collect().toArray(ClassTag$.MODULE$.apply(Tuple2.class));
        ObjectRef create = ObjectRef.create(distinct.map(new RDFByModularityClustering$$anonfun$7(), new RDFByModularityClustering$$anon$36(BasicTypeInfo.getInfoFor(String.class)), ClassTag$.MODULE$.apply(List.class)));
        Predef$.MODULE$.println("Starting iteration");
        Predef$.MODULE$.println();
        Breaks$.MODULE$.breakable(new RDFByModularityClustering$$anonfun$apply$1(executionEnvironment, i, count, map2, tuple2Arr, create, BooleanRef.create(false)));
        Seq collect = ((DataSet) create.elem).collect();
        StringWriter stringWriter = new StringWriter();
        IntRef create2 = IntRef.create(0);
        Predef$.MODULE$.println("The computed clusters are:");
        collect.foreach(new RDFByModularityClustering$$anonfun$apply$6(stringWriter, create2));
        return WriteToFile(executionEnvironment.fromCollection(Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{stringWriter.toString()})), ClassTag$.MODULE$.apply(String.class), BasicTypeInfo.getInfoFor(String.class)), str2, new Tuple2.mcZI.sp(true, 1));
    }

    public Tuple2<DataSet<List<String>>, Object> iterationStepClusteringRDFByModularity(long j, Tuple2<String, String>[] tuple2Arr, Map<String, Object> map, DataSet<List<String>> dataSet, ExecutionEnvironment executionEnvironment) {
        Tuple2 tuple2 = (Tuple2) dataSet.cross(dataSet).filter(new RDFByModularityClustering$$anonfun$8()).filter(new RDFByModularityClustering$$anonfun$9(tuple2Arr)).map(new RDFByModularityClustering$$anonfun$10(j, tuple2Arr, map), new RDFByModularityClustering$$anon$13(), ClassTag$.MODULE$.apply(Tuple2.class)).groupBy(Predef$.MODULE$.wrapIntArray(new int[]{0})).sortGroup(1, Order.ASCENDING).reduceGroup(new RDFByModularityClustering$$anonfun$11(), new RDFByModularityClustering$$anon$24(), ClassTag$.MODULE$.apply(Tuple2.class)).collect().head();
        if (tuple2 != null) {
            Tuple2 tuple22 = (Tuple2) tuple2._1();
            double _2$mcD$sp = tuple2._2$mcD$sp();
            if (tuple22 != null) {
                Tuple3 tuple3 = new Tuple3((List) tuple22._1(), (List) tuple22._2(), BoxesRunTime.boxToDouble(_2$mcD$sp));
                List list = (List) tuple3._1();
                List list2 = (List) tuple3._2();
                return BoxesRunTime.unboxToDouble(tuple3._3()) < ((double) 0) ? new Tuple2<>(dataSet, BoxesRunTime.boxToBoolean(true)) : new Tuple2<>(dataSet.filter(new RDFByModularityClustering$$anonfun$iterationStepClusteringRDFByModularity$1(list, list2)).union(executionEnvironment.fromCollection(List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new List[]{(List) list.union(list2, List$.MODULE$.canBuildFrom())})), ClassTag$.MODULE$.apply(List.class), new RDFByModularityClustering$$anon$54(BasicTypeInfo.getInfoFor(String.class)))), BoxesRunTime.boxToBoolean(false));
            }
        }
        throw new MatchError(tuple2);
    }

    public double deltaQ(long j, Map<String, Object> map, Tuple2<String, String>[] tuple2Arr, List<String> list, List<String> list2) {
        return (1.0d / j) * BoxesRunTime.unboxToDouble(((List) ((List) list.flatMap(new RDFByModularityClustering$$anonfun$12(list2), List$.MODULE$.canBuildFrom())).map(new RDFByModularityClustering$$anonfun$13(j, map, tuple2Arr), List$.MODULE$.canBuildFrom())).fold(BoxesRunTime.boxToDouble(0.0d), new RDFByModularityClustering$$anonfun$deltaQ$1()));
    }

    public <T> DataSink<T> WriteToFile(DataSet<T> dataSet, String str, Tuple2<Object, Object> tuple2) {
        DataSink<T> writeAsText;
        boolean _1$mcZ$sp = tuple2._1$mcZ$sp();
        if (true == _1$mcZ$sp) {
            writeAsText = dataSet.writeAsText(str, FileSystem.WriteMode.OVERWRITE).setParallelism(tuple2._2$mcI$sp());
        } else {
            if (false != _1$mcZ$sp) {
                throw new MatchError(BoxesRunTime.boxToBoolean(_1$mcZ$sp));
            }
            writeAsText = dataSet.writeAsText(str, FileSystem.WriteMode.OVERWRITE);
        }
        return writeAsText;
    }

    public <T> Tuple2<Object, Object> WriteToFile$default$3() {
        return new Tuple2.mcZI.sp(false, 0);
    }

    private RDFByModularityClustering$() {
        MODULE$ = this;
    }
}
