package org.danilopianini.plagiarismdetector.core.filter.technique.tokenization;

import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import org.danilopianini.plagiarismdetector.core.analyzer.representation.TokenizedSource;
import org.danilopianini.plagiarismdetector.core.analyzer.representation.token.Token;
import org.danilopianini.plagiarismdetector.core.analyzer.representation.token.TokenType;
import org.danilopianini.plagiarismdetector.core.filter.RepresentationFilter;
import org.danilopianini.plagiarismdetector.core.filter.indexer.technique.tokenization.TokenBasedIndexer;
import org.jetbrains.annotations.NotNull;

/* compiled from: TokenizedSourceFilter.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��>\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010\u0006\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010$\n\u0002\u0018\u0002\n\u0002\u0010\b\n\u0002\b\u0005\n\u0002\u0010\u001e\n\u0002\b\u0002\u0018��2\u0014\u0012\u0004\u0012\u00020\u0002\u0012\n\u0012\b\u0012\u0004\u0012\u00020\u00040\u00030\u0001B\r\u0012\u0006\u0010\u0005\u001a\u00020\u0006¢\u0006\u0002\u0010\u0007J0\u0010\n\u001a\u00020\u00062\u0012\u0010\u000b\u001a\u000e\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020\u000e0\f2\u0012\u0010\u000f\u001a\u000e\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020\u000e0\fH\u0002J%\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00020\u00032\u0006\u0010\u0011\u001a\u00020\u00022\f\u0010\u0012\u001a\b\u0012\u0004\u0012\u00020\u00020\u0003H\u0096\u0002J\u0012\u0010\u0013\u001a\u00020\u0006*\b\u0012\u0004\u0012\u00020\u000e0\u0014H\u0002J\f\u0010\u0015\u001a\u00020\u0006*\u00020\u000eH\u0002R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082\u0004¢\u0006\u0002\n��¨\u0006\u0016"}, d2 = {"Lorg/danilopianini/plagiarismdetector/core/filter/technique/tokenization/TokenizedSourceFilter;", "Lorg/danilopianini/plagiarismdetector/core/filter/RepresentationFilter;", "Lorg/danilopianini/plagiarismdetector/core/analyzer/representation/TokenizedSource;", "Lkotlin/sequences/Sequence;", "Lorg/danilopianini/plagiarismdetector/core/analyzer/representation/token/Token;", "threshold", "", "(D)V", "indexer", "Lorg/danilopianini/plagiarismdetector/core/filter/indexer/technique/tokenization/TokenBasedIndexer;", "cosineSimilarityOf", "index1", "", "Lorg/danilopianini/plagiarismdetector/core/analyzer/representation/token/TokenType;", "", "index2", "invoke", "submission", "corpus", "norm", "", "squared", "code-plagiarism-detector"})
@SourceDebugExtension({"SMAP\nTokenizedSourceFilter.kt\nKotlin\n*S Kotlin\n*F\n+ 1 TokenizedSourceFilter.kt\norg/danilopianini/plagiarismdetector/core/filter/technique/tokenization/TokenizedSourceFilter\n+ 2 _Sequences.kt\nkotlin/sequences/SequencesKt___SequencesKt\n+ 3 Maps.kt\nkotlin/collections/MapsKt__MapsKt\n+ 4 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 5 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,44:1\n759#2,2:45\n775#2,4:47\n453#3:51\n403#3:52\n1238#4,4:53\n1#5:57\n*S KotlinDebug\n*F\n+ 1 TokenizedSourceFilter.kt\norg/danilopianini/plagiarismdetector/core/filter/technique/tokenization/TokenizedSourceFilter\n*L\n25#1:45,2\n25#1:47,4\n26#1:51\n26#1:52\n26#1:53,4\n*E\n"})
/* loaded from: input_file:org/danilopianini/plagiarismdetector/core/filter/technique/tokenization/TokenizedSourceFilter.class */
public final class TokenizedSourceFilter implements RepresentationFilter<TokenizedSource, Sequence<? extends Token>> {
    private final double threshold;

    @NotNull
    private final TokenBasedIndexer indexer = new TokenBasedIndexer();

    public TokenizedSourceFilter(double d) {
        this.threshold = d;
    }

    @NotNull
    public Sequence<TokenizedSource> invoke(@NotNull TokenizedSource tokenizedSource, @NotNull Sequence<? extends TokenizedSource> sequence) {
        Intrinsics.checkNotNullParameter(tokenizedSource, "submission");
        Intrinsics.checkNotNullParameter(sequence, "corpus");
        TokenBasedIndexer tokenBasedIndexer = this.indexer;
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (Object obj : sequence) {
            linkedHashMap.put(obj, tokenBasedIndexer.invoke(obj));
        }
        LinkedHashMap linkedHashMap2 = linkedHashMap;
        LinkedHashMap linkedHashMap3 = new LinkedHashMap(MapsKt.mapCapacity(linkedHashMap2.size()));
        for (Object obj2 : linkedHashMap2.entrySet()) {
            linkedHashMap3.put(((Map.Entry) obj2).getKey(), Double.valueOf(cosineSimilarityOf(this.indexer.invoke(tokenizedSource), (Map) ((Map.Entry) obj2).getValue())));
        }
        double minOrThrow = CollectionsKt.minOrThrow(linkedHashMap3.values());
        final double maxOrThrow = minOrThrow + (this.threshold * (CollectionsKt.maxOrThrow(linkedHashMap3.values()) - minOrThrow));
        return SequencesKt.map(SequencesKt.filter(MapsKt.asSequence(linkedHashMap3), new Function1<Map.Entry<? extends TokenizedSource, ? extends Double>, Boolean>() { // from class: org.danilopianini.plagiarismdetector.core.filter.technique.tokenization.TokenizedSourceFilter$invoke$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull Map.Entry<? extends TokenizedSource, Double> entry) {
                Intrinsics.checkNotNullParameter(entry, "it");
                return Boolean.valueOf(entry.getValue().doubleValue() >= maxOrThrow);
            }
        }), new Function1<Map.Entry<? extends TokenizedSource, ? extends Double>, TokenizedSource>() { // from class: org.danilopianini.plagiarismdetector.core.filter.technique.tokenization.TokenizedSourceFilter$invoke$2
            @NotNull
            public final TokenizedSource invoke(@NotNull Map.Entry<? extends TokenizedSource, Double> entry) {
                Intrinsics.checkNotNullParameter(entry, "it");
                return entry.getKey();
            }
        });
    }

    private final double cosineSimilarityOf(Map<TokenType, Integer> map, Map<TokenType, Integer> map2) {
        int i;
        int i2 = 0;
        for (Object obj : map.keySet()) {
            int i3 = i2;
            TokenType tokenType = (TokenType) obj;
            Integer num = map2.get(tokenType);
            if (num != null) {
                int intValue = num.intValue();
                Integer num2 = map.get(tokenType);
                i = intValue * (num2 != null ? num2.intValue() : 0);
            } else {
                i = 0;
            }
            i2 = i3 + i;
        }
        return i2 / (norm(map.values()) * norm(map2.values()));
    }

    private final double norm(Collection<Integer> collection) {
        double d = 0.0d;
        Iterator<T> it = collection.iterator();
        while (it.hasNext()) {
            d += squared(((Number) it.next()).intValue());
        }
        return Math.sqrt(d);
    }

    private final double squared(int i) {
        return Math.pow(i, 2);
    }
}
