package com.apple.foundationdb.record.provider.common.text;

import com.apple.foundationdb.record.metadata.IndexTypes;
import com.apple.foundationdb.record.provider.common.text.TextTokenizer;
import com.google.common.collect.ImmutableSet;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.PluralRules;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Spliterators;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import javax.annotation.Nonnull;
import org.apache.logging.log4j.core.jackson.JsonConstants;
import org.jline.builtins.TTop;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Named;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

/* loaded from: input_file:com/apple/foundationdb/record/provider/common/text/TextTokenizerTest.class */
public class TextTokenizerTest {
    private static final List<List<String>> EXPECTED_DEFAULT_SAMPLE_TOKENS = List.of((Object[]) new List[]{List.of("the", "angstrom", "unit", "a", "was", "named", "after", "anders", "angstrom"), List.of((Object[]) new String[]{"according", "to", "the", "encyclopædia", "æthelred", "the", "unræd", "was", "king", "from", "966", "to", "1016"}), List.of("hello", "there"), List.of(TextSamples.CHINESE_SIMPLIFIED), List.of(TextSamples.CHINESE_TRADITIONAL), List.of(IndexTypes.TEXT, "tokenization", "and", "normalization", "is"), List.of("who", "started", "the", "fire"), List.of("apres", "deux", "napoleons", "france", "a", "recu", "un", "thiers"), List.of((Object[]) new String[]{"die", "nationalmannschaft", "hat", "die", "weltmeisterschaft", "gewonnen", "horte", "ich", "wahrend", "ich", "die", "friedrichstraße", "hinunterlief"}), List.of((Object[]) new String[]{"ολοι", "οι", "ανθρωποι", "ειναι", "θνητοι", "ο", "σωκρατης", "ειναι", "ανθρωπος", "ο", "σωκρατης", "ειναι", "θνητος"}), List.of("나는", "한국어를", "못해"), List.of("נון"), List.of("english", "used", "to", "have", "multiple", "versions", "of", "the", "letter", DateFormat.SECOND), List.of((Object[]) new String[]{PluralRules.KEYWORD_TWO, "households", "both", "alike", "in", "dignity", "in", "fair", "verona", "where", "we", "lay", "our", "scene", "from", "ancient", "grudge", "break", "to", "new", "mutiny", "where", "civil", "blood", "makes", "civil", "hands", "unclean", "from", "forth", "the", "fatal", "loins", "of", "these", PluralRules.KEYWORD_TWO, "foes", "a", "pair", "of", "star-cross", DateFormat.DAY, "lovers", "take", "their", "life", "whose", "misadventur", DateFormat.DAY, "piteous", "overthrows", "doth", "with", "their", "death", "bury", "their", JsonConstants.ELT_PARENTS, "strife", "the", "fearful", "passage", "of", "their", "death-mark", DateFormat.DAY, "love", "and", "the", "continuance", "of", "their", JsonConstants.ELT_PARENTS, "rage", "which", "but", "their", "children", DateFormat.SECOND, "end", "nought", "could", "remove", "is", "now", "the", PluralRules.KEYWORD_TWO, "hours", "traffic", "of", "our", "stage", "the", "which", "if", "you", "with", "patient", "ears", "attend", "what", "here", "shall", "miss", "our", "toil", "shall", "strive", "to", "mend"}), List.of("актер", "посетил", "многие", "достопримечательности", "москвы"), List.of("లద", "అద", "అరధలనదన"), List.of("การสะกดการนตไทยมความซบซอนมาก"), List.of("https", "www.example.com", "fake-path", "1932e32ab3efc0014228eadc28219da2", "hm"), List.of("א", "שפראך", "איז", "א", "דיאלעקט", "מיט", "אן", "ארמיי", "און", "פלאט")});
    private static final List<List<String>> EXPECTED_PREFIX_V0_SAMPLE_TOKENS = List.of((Object[]) new List[]{List.of("the", "ang", "uni", "a", "was", "nam", "aft", "and", "ang"), List.of((Object[]) new String[]{"acc", "to", "the", "enc", "æth", "the", "unr", "was", "kin", "fro", "966", "to", "101"}), List.of("hel", "the"), List.of("苹果园"), List.of("蘋果園"), List.of("tex", "tok", "and", "nor", "is"), List.of("who", "sta", "the", "fir"), List.of("apr", "deu", "nap", "fra", "a", "rec", "un", "thi"), List.of((Object[]) new String[]{"die", "nat", "hat", "die", "wel", "gew", "hor", "ich", "wah", "ich", "die", "fri", "hin"}), List.of((Object[]) new String[]{"ολο", "οι", "ανθ", "ειν", "θνη", "ο", "σωκ", "ειν", "ανθ", "ο", "σωκ", "ειν", "θνη"}), List.of("나ᄂ", "한", "못"), List.of("נון"), List.of("eng", "use", "to", "hav", "mul", "ver", "of", "the", "let", DateFormat.SECOND), List.of((Object[]) new String[]{PluralRules.KEYWORD_TWO, "hou", "bot", "ali", "in", "dig", "in", "fai", "ver", "whe", "we", "lay", "our", "sce", "fro", "anc", "gru", "bre", "to", "new", "mut", "whe", "civ", "blo", "mak", "civ", "han", "unc", "fro", "for", "the", "fat", "loi", "of", "the", PluralRules.KEYWORD_TWO, "foe", "a", "pai", "of", "sta", DateFormat.DAY, "lov", "tak", "the", "lif", "who", "mis", DateFormat.DAY, "pit", "ove", "dot", "wit", "the", "dea", "bur", "the", "par", "str", "the", "fea", "pas", "of", "the", "dea", DateFormat.DAY, "lov", "and", "the", "con", "of", "the", "par", "rag", "whi", "but", "the", "chi", DateFormat.SECOND, "end", "nou", "cou", "rem", "is", "now", "the", PluralRules.KEYWORD_TWO, "hou", "tra", "of", "our", "sta", "the", "whi", "if", "you", "wit", "pat", "ear", "att", "wha", "her", "sha", "mis", "our", "toi", "sha", "str", "to", "men"}), List.of("акт", "пос", "мно", "дос", "мос"), List.of("లద", "అద", "అరధ"), List.of("การ"), List.of("htt", "www", "fak", "193", "hm"), List.of("א", "שפר", "איז", "א", "דיא", "מיט", "אן", "ארמ", "און", "פלא")});
    private static final List<List<String>> EXPECTED_PREFIX_V1_SAMPLE_TOKENS = List.of((Object[]) new List[]{List.of("the", "angs", "unit", "a", "was", TTop.STAT_NAME, "afte", "ande", "angs"), List.of((Object[]) new String[]{"acco", "to", "the", "ency", "æthe", "the", "unræ", "was", "king", "from", "966", "to", "1016"}), List.of("hell", "ther"), List.of(TextSamples.CHINESE_SIMPLIFIED), List.of(TextSamples.CHINESE_TRADITIONAL), List.of(IndexTypes.TEXT, "toke", "and", "norm", "is"), List.of("who", "star", "the", "fire"), List.of("apre", "deux", "napo", "fran", "a", "recu", "un", "thie"), List.of((Object[]) new String[]{"die", "nati", "hat", "die", "welt", "gewo", "hort", "ich", "wahr", "ich", "die", "frie", "hinu"}), List.of((Object[]) new String[]{"ολοι", "οι", "ανθρ", "εινα", "θνητ", "ο", "σωκρ", "εινα", "ανθρ", "ο", "σωκρ", "εινα", "θνητ"}), List.of("나느", "한ᄀ", "못ᄒ"), List.of("נון"), List.of("engl", "used", "to", "have", "mult", "vers", "of", "the", "lett", DateFormat.SECOND), List.of((Object[]) new String[]{PluralRules.KEYWORD_TWO, "hous", "both", "alik", "in", "dign", "in", "fair", "vero", "wher", "we", "lay", "our", "scen", "from", "anci", "grud", "brea", "to", "new", "muti", "wher", "civi", "bloo", "make", "civi", "hand", "uncl", "from", "fort", "the", "fata", "loin", "of", "thes", PluralRules.KEYWORD_TWO, "foes", "a", "pair", "of", "star", DateFormat.DAY, "love", "take", "thei", "life", "whos", "misa", DateFormat.DAY, "pite", "over", "doth", "with", "thei", "deat", "bury", "thei", "pare", "stri", "the", "fear", "pass", "of", "thei", "deat", DateFormat.DAY, "love", "and", "the", "cont", "of", "thei", "pare", "rage", "whic", "but", "thei", "chil", DateFormat.SECOND, "end", "noug", "coul", "remo", "is", "now", "the", PluralRules.KEYWORD_TWO, "hour", "traf", "of", "our", "stag", "the", "whic", "if", "you", "with", "pati", "ears", "atte", "what", "here", "shal", "miss", "our", "toil", "shal", "stri", "to", "mend"}), List.of("акте", "посе", "мног", "дост", "моск"), List.of("లద", "అద", "అరధల"), List.of("การส"), List.of("http", "www.", "fake", "1932", "hm"), List.of("א", "שפרא", "איז", "א", "דיאל", "מיט", "אן", "ארמי", "און", "פלאט")});
    private static final List<List<String>> EXPECTED_FILTERED_SAMPLE_TOKENS = List.of((Object[]) new List[]{List.of("", "angstrom", "unit", "", "was", "named", "after", "anders", "angstrom"), List.of((Object[]) new String[]{"according", "to", "", "encyclopædia", "æthelred", "", "unræd", "was", "king", "from", "966", "to", "1016"}), List.of("hello", "there"), List.of(TextSamples.CHINESE_SIMPLIFIED), List.of(TextSamples.CHINESE_TRADITIONAL), List.of(IndexTypes.TEXT, "tokenization", "", "normalization", "is"), List.of("who", "started", "", "fire"), List.of("apres", "deux", "napoleons", "france", "", "recu", "un", "thiers"), List.of((Object[]) new String[]{"die", "nationalmannschaft", "hat", "die", "weltmeisterschaft", "gewonnen", "horte", "ich", "wahrend", "ich", "die", "friedrichstraße", "hinunterlief"}), List.of((Object[]) new String[]{"ολοι", "οι", "ανθρωποι", "ειναι", "θνητοι", "ο", "σωκρατης", "ειναι", "ανθρωπος", "ο", "σωκρατης", "ειναι", "θνητος"}), List.of("나는", "한국어를", "못해"), List.of("נון"), List.of("english", "used", "to", "have", "multiple", "versions", "", "", "letter", DateFormat.SECOND), List.of((Object[]) new String[]{PluralRules.KEYWORD_TWO, "households", "both", "alike", "", "dignity", "", "fair", "verona", "where", "we", "lay", "our", "scene", "from", "ancient", "grudge", "break", "to", "new", "mutiny", "where", "civil", "blood", "makes", "civil", "hands", "unclean", "from", "forth", "", "fatal", "loins", "", "these", PluralRules.KEYWORD_TWO, "foes", "", "pair", "", "star-cross", DateFormat.DAY, "lovers", "take", "their", "life", "whose", "misadventur", DateFormat.DAY, "piteous", "overthrows", "doth", "with", "their", "death", "bury", "their", JsonConstants.ELT_PARENTS, "strife", "", "fearful", "passage", "", "their", "death-mark", DateFormat.DAY, "love", "", "", "continuance", "", "their", JsonConstants.ELT_PARENTS, "rage", "which", "but", "their", "children", DateFormat.SECOND, "end", "nought", "could", "remove", "is", "now", "", PluralRules.KEYWORD_TWO, "hours", "traffic", "", "our", "stage", "", "which", "if", "you", "with", "patient", "ears", "attend", "what", "here", "shall", "miss", "our", "toil", "shall", "strive", "to", "mend"}), List.of("актер", "посетил", "многие", "достопримечательности", "москвы"), List.of("లద", "అద", "అరధలనదన"), List.of("การสะกดการนตไทยมความซบซอนมาก"), List.of("https", "www.example.com", "fake-path", "1932e32ab3efc0014228eadc28219da2", "hm"), List.of("א", "שפראך", "איז", "א", "דיאלעקט", "מיט", "אן", "ארמיי", "און", "פלאט")});
    private static final List<List<String>> EXPECTED_RECONSTITUTED_UNIQUE_LIMIT_SAMPLE_TOKENS = List.of((Object[]) new List[]{List.of("the", "angstrom", "unit", "a", "was"), List.of("according", "to", "the", "encyclopædia", "æthelred", "the"), List.of("hello", "there"), List.of(TextSamples.CHINESE_SIMPLIFIED), List.of(TextSamples.CHINESE_TRADITIONAL), List.of(IndexTypes.TEXT, "tokenization", "and", "normalization", "is"), List.of("who", "started", "the", "fire"), List.of("apres", "deux", "napoleons", "france", "a"), List.of("die", "nationalmannschaft", "hat", "die", "weltmeisterschaft", "gewonnen"), List.of("ολοι", "οι", "ανθρωποι", "ειναι", "θνητοι"), List.of("나는", "한국어를", "못해"), List.of("נון"), List.of("english", "used", "to", "have", "multiple"), List.of(PluralRules.KEYWORD_TWO, "households", "both", "alike", "in"), List.of("актер", "посетил", "многие", "достопримечательности", "москвы"), List.of("లద", "అద", "అరధలనదన"), List.of("การสะกดการนตไทยมความซบซอนมาก"), List.of("https", "www.example.com", "fake-path", "1932e32ab3efc0014228eadc28219da2", "hm"), List.of("א", "שפראך", "איז", "א", "דיאלעקט", "מיט")});

    private List<String> tokenList(@Nonnull TextTokenizer textTokenizer, @Nonnull String str, int i) {
        return textTokenizer.tokenizeToList(str, i, TextTokenizer.TokenizerMode.INDEX);
    }

    private List<String> reconstitutedTokenList(@Nonnull TextTokenizer textTokenizer, @Nonnull String str, int i) {
        Map<String, List<Integer>> map = textTokenizer.tokenizeToMap(str, i, TextTokenizer.TokenizerMode.INDEX);
        int i2 = 0;
        for (List<Integer> list : map.values()) {
            i2 = Math.max(i2, list.get(list.size() - 1).intValue());
        }
        String[] strArr = new String[i2 + 1];
        Arrays.fill(strArr, "");
        for (Map.Entry<String, List<Integer>> entry : map.entrySet()) {
            Iterator<Integer> it = entry.getValue().iterator();
            while (it.hasNext()) {
                strArr[it.next().intValue()] = entry.getKey();
            }
        }
        return Arrays.asList(strArr);
    }

    private static <T, U, R> Stream<R> zip(Collection<T> collection, Collection<U> collection2, final BiFunction<T, U, R> biFunction) {
        final Iterator<T> it = collection.iterator();
        final Iterator<U> it2 = collection2.iterator();
        return StreamSupport.stream(Spliterators.spliterator(new Iterator<R>() { // from class: com.apple.foundationdb.record.provider.common.text.TextTokenizerTest.1
            @Override // java.util.Iterator
            public boolean hasNext() {
                return it.hasNext() || it2.hasNext();
            }

            @Override // java.util.Iterator
            public R next() {
                return (R) biFunction.apply(it.hasNext() ? it.next() : null, it2.hasNext() ? it2.next() : null);
            }
        }, Math.max(collection.size(), collection2.size()), 64), false);
    }

    @Nonnull
    private static Stream<Arguments> test(List<List<String>> list, final TextTokenizer textTokenizer, final int i) {
        final Iterator<String> it = TextSamples.ALL.iterator();
        final Iterator<List<String>> it2 = list.iterator();
        return StreamSupport.stream(Spliterators.spliterator(new Iterator<Arguments>() { // from class: com.apple.foundationdb.record.provider.common.text.TextTokenizerTest.2
            @Override // java.util.Iterator
            public boolean hasNext() {
                return it.hasNext() || it2.hasNext();
            }

            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.Iterator
            public Arguments next() {
                return Arguments.of(new Object[]{Named.of(textTokenizer.getName(), textTokenizer), Integer.valueOf(i), it.hasNext() ? (String) it.next() : null, it2.hasNext() ? (List) it2.next() : null});
            }
        }, Math.max(TextSamples.ALL.size(), list.size()), 64), false);
    }

    static Stream<Arguments> compatibility() {
        return Stream.of((Object[]) new Stream[]{test(EXPECTED_DEFAULT_SAMPLE_TOKENS, UniqueTokenLimitTextTokenizer.instance(), 0), test(EXPECTED_DEFAULT_SAMPLE_TOKENS, DefaultTextTokenizer.instance(), DefaultTextTokenizer.instance().getMinVersion()), test(EXPECTED_PREFIX_V0_SAMPLE_TOKENS, PrefixTextTokenizer.instance(), 0), test(EXPECTED_PREFIX_V1_SAMPLE_TOKENS, PrefixTextTokenizer.instance(), 1), test(EXPECTED_FILTERED_SAMPLE_TOKENS, filteredTokenizer(), DefaultTextTokenizer.instance().getMinVersion())}).flatMap(Function.identity());
    }

    @MethodSource
    @ParameterizedTest
    void compatibility(TextTokenizer textTokenizer, int i, String str, List<String> list) {
        Assertions.assertEquals(list, tokenList(textTokenizer, str, i));
    }

    static Stream<Arguments> reconstituted() {
        return Stream.of((Object[]) new Stream[]{test(EXPECTED_RECONSTITUTED_UNIQUE_LIMIT_SAMPLE_TOKENS, UniqueTokenLimitTextTokenizer.instance(), 0), test(EXPECTED_DEFAULT_SAMPLE_TOKENS, DefaultTextTokenizer.instance(), DefaultTextTokenizer.instance().getMinVersion()), test(EXPECTED_PREFIX_V0_SAMPLE_TOKENS, PrefixTextTokenizer.instance(), 0), test(EXPECTED_PREFIX_V1_SAMPLE_TOKENS, PrefixTextTokenizer.instance(), 1), test(EXPECTED_FILTERED_SAMPLE_TOKENS, filteredTokenizer(), DefaultTextTokenizer.instance().getMinVersion())}).flatMap(Function.identity());
    }

    @MethodSource
    @ParameterizedTest
    void reconstituted(TextTokenizer textTokenizer, int i, String str, List<String> list) {
        Assertions.assertEquals(list, reconstitutedTokenList(textTokenizer, str, i));
    }

    @Nonnull
    private static TextTokenizer filteredTokenizer() {
        ImmutableSet of = ImmutableSet.of("the", "of", "in", "and", "a", "an", "some");
        return FilteringTextTokenizer.create("filter_common", new DefaultTextTokenizerFactory(), (charSequence, num) -> {
            return !of.contains(charSequence.toString());
        }).getTokenizer();
    }
}
