package org.gradoop.flink.algorithms.fsm.transactional;

import java.util.Map;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.operators.FilterOperator;
import org.apache.flink.api.java.operators.GroupReduceOperator;
import org.apache.flink.api.java.operators.IterativeDataSet;
import org.apache.flink.api.java.operators.SingleInputUdfOperator;
import org.gradoop.flink.algorithms.fsm.dimspan.config.DIMSpanConstants;
import org.gradoop.flink.algorithms.fsm.transactional.common.FSMConfig;
import org.gradoop.flink.algorithms.fsm.transactional.common.TFSMConstants;
import org.gradoop.flink.algorithms.fsm.transactional.tle.ThinkLikeAnEmbeddingFSMBase;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CCSSingleEdgeEmbeddings;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CCSSubgraphDecoder;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CCSSubgraphOnly;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CCSWrapInSubgraphEmbeddings;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CategoryEdgeLabels;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CategoryFrequent;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CategoryFrequentAndInteresting;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CategoryGraphCounts;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CategoryMinFrequencies;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CategoryVertexLabels;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.CategoryWithCount;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.IsCharacteristic;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.IsResult;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.LabelOnly;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.MinEdgeCount;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.ToCCSGraph;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.WithoutInfrequentEdgeLabels;
import org.gradoop.flink.algorithms.fsm.transactional.tle.functions.WithoutInfrequentVertexLabels;
import org.gradoop.flink.algorithms.fsm.transactional.tle.pojos.CCSGraph;
import org.gradoop.flink.algorithms.fsm.transactional.tle.tuples.CCSSubgraph;
import org.gradoop.flink.algorithms.fsm.transactional.tle.tuples.CCSSubgraphEmbeddings;
import org.gradoop.flink.model.impl.functions.utils.First;
import org.gradoop.flink.model.impl.layouts.transactional.tuples.GraphTransaction;

/* loaded from: input_file:org/gradoop/flink/algorithms/fsm/transactional/CategoryCharacteristicSubgraphs.class */
public class CategoryCharacteristicSubgraphs extends ThinkLikeAnEmbeddingFSMBase<CCSGraph, CCSSubgraph, CCSSubgraphEmbeddings> {
    public static final String CATEGORY_KEY = "_category";
    private DataSet<Map<String, Long>> categoryCounts;
    private DataSet<Map<String, Long>> categoryMinFrequencies;
    private final float minInterestingness;

    public CategoryCharacteristicSubgraphs(FSMConfig fSMConfig, float f) {
        super(fSMConfig);
        this.minInterestingness = f;
    }

    @Override // org.gradoop.flink.algorithms.fsm.transactional.tle.TransactionalFSMBase
    public DataSet<GraphTransaction> execute(DataSet<GraphTransaction> dataSet) {
        DataSet<CCSGraph> map = dataSet.map(new ToCCSGraph());
        setCategoryCounts(map);
        setMinFrequencies();
        if (this.fsmConfig.isPreprocessingEnabled()) {
            map = preProcessCategories(map);
        }
        IterativeDataSet iterate = map.flatMap(new CCSSingleEdgeEmbeddings(this.fsmConfig)).iterate(this.fsmConfig.getMaxEdgeCount());
        FilterOperator filter = iterate.filter(new IsResult(false));
        DataSet<CCSSubgraph> categoryFrequentSubgraphs = getCategoryFrequentSubgraphs(filter);
        GroupReduceOperator reduceGroup = categoryFrequentSubgraphs.groupBy(new int[]{0}).reduceGroup(new First());
        DataSet<CCSSubgraphEmbeddings> growEmbeddingsOfFrequentSubgraphs = growEmbeddingsOfFrequentSubgraphs(filterByFrequentSubgraphs(filter, reduceGroup), reduceGroup);
        DataSet map2 = iterate.closeWith(iterate.filter(new IsResult(true)).union(getCharacteristicSubgraphs(categoryFrequentSubgraphs).map(new CCSWrapInSubgraphEmbeddings())).union(growEmbeddingsOfFrequentSubgraphs), growEmbeddingsOfFrequentSubgraphs).filter(new IsResult(true)).map(new CCSSubgraphOnly());
        if (this.fsmConfig.getMinEdgeCount() > 1) {
            map2 = map2.filter(new MinEdgeCount(this.fsmConfig));
        }
        return map2.map(new CCSSubgraphDecoder(this.config));
    }

    private void setCategoryCounts(DataSet<CCSGraph> dataSet) {
        this.categoryCounts = dataSet.map(new CategoryWithCount()).groupBy(new int[]{0}).sum(1).reduceGroup(new CategoryGraphCounts());
    }

    private void setMinFrequencies() {
        this.categoryMinFrequencies = this.categoryCounts.map(new CategoryMinFrequencies(this.fsmConfig));
    }

    private DataSet<CCSGraph> preProcessCategories(DataSet<CCSGraph> dataSet) {
        SingleInputUdfOperator withBroadcastSet = dataSet.map(new WithoutInfrequentVertexLabels()).withBroadcastSet(dataSet.flatMap(new CategoryVertexLabels()).groupBy(new int[]{0, 1}).sum(2).filter(new CategoryFrequent()).withBroadcastSet(this.categoryMinFrequencies, DIMSpanConstants.MIN_FREQUENCY).map(new LabelOnly()).distinct(), TFSMConstants.FREQUENT_VERTEX_LABELS);
        return withBroadcastSet.map(new WithoutInfrequentEdgeLabels()).withBroadcastSet(withBroadcastSet.flatMap(new CategoryEdgeLabels()).groupBy(new int[]{0, 1}).sum(2).filter(new CategoryFrequent()).withBroadcastSet(this.categoryMinFrequencies, DIMSpanConstants.MIN_FREQUENCY).map(new LabelOnly()).distinct(), TFSMConstants.FREQUENT_EDGE_LABELS);
    }

    private FilterOperator<CCSSubgraph> getCharacteristicSubgraphs(DataSet<CCSSubgraph> dataSet) {
        return dataSet.filter(new IsCharacteristic());
    }

    private DataSet<CCSSubgraph> getCategoryFrequentSubgraphs(DataSet<CCSSubgraphEmbeddings> dataSet) {
        return dataSet.map(new CCSSubgraphOnly()).groupBy(new int[]{0, 3}).sum(1).groupBy(new int[]{0}).reduceGroup(new CategoryFrequentAndInteresting(this.minInterestingness)).withBroadcastSet(this.categoryCounts, TFSMConstants.GRAPH_COUNT).withBroadcastSet(this.categoryMinFrequencies, DIMSpanConstants.MIN_FREQUENCY);
    }
}
