package com.apple.foundationdb.record.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:com/apple/foundationdb/record/lucene/AlphanumericCjkAnalyzerTest.class */
class AlphanumericCjkAnalyzerTest {
    AlphanumericCjkAnalyzerTest() {
    }

    @Test
    void verifyTextWithMixedCharactersIsTokenizedAsExpected() throws IOException {
        Assertions.assertIterableEquals(List.of((Object[]) new String[]{"water", "水", "水", "물", "的", "の", "의", "house", "屋", "家", "집", "you", "你", "君", "너"}), readTokenizedText(new AlphanumericCjkAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, 3, 30, (String) null), "water水水물-of的の의。house屋家집\nyou你君너"), "Incorrect tokenized string!");
    }

    @Test
    void verifyAlphanumericMinTokenLengthIsRespected() throws IOException {
        Assertions.assertIterableEquals(List.of("当", "when", "全", "行", "每", "each", "醒", "wake"), readTokenizedText(new AlphanumericCjkAnalyzer(CharArraySet.EMPTY_SET, 4, 6, (String) null), "当when全all 行 can 每each。醒-wake"), "Incorrect tokenized string!");
    }

    @Test
    void verifyAlphanumericMaxTokenLengthIsRespected() throws IOException {
        Assertions.assertIterableEquals(List.of((Object[]) new String[]{"当", "全", "all", "行", "can", "每", "醒", "我", "i", "之", "of"}), readTokenizedText(new AlphanumericCjkAnalyzer(CharArraySet.EMPTY_SET, 1, 3, false, (String) null), "当when全all 行 can 每each。醒-wake，我I\n之of"), "Incorrect tokenized string!");
    }

    @Test
    void breaksLongTokensApart() throws IOException {
        Assertions.assertIterableEquals(List.of((Object[]) new String[]{"当", "whe", "n", "全", "all", "行", "can", "每", "eac", "h", "醒", "wak", "e", "我", "i", "之", "of"}), readTokenizedText(new AlphanumericCjkAnalyzer(CharArraySet.EMPTY_SET, 1, 3, true, (String) null), "当when全all 行 can 每each。醒-wake，我I\n之of"), "Incorrect tokenized string!");
    }

    @Test
    void verifyStopwordsAreExcluded() throws IOException {
        Assertions.assertIterableEquals(List.of("当", "全", "all", "行", "can", "每", "each", "醒", "wake"), readTokenizedText(new AlphanumericCjkAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET), "当of全all 行 can 每each。醒-wake"), "Incorrect tokenized String!");
    }

    private List<String> readTokenizedText(Analyzer analyzer, String str) throws IOException {
        TokenStream tokenStream = analyzer.tokenStream("text", str);
        CharTermAttribute addAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();
        ArrayList arrayList = new ArrayList();
        while (tokenStream.incrementToken()) {
            arrayList.add(addAttribute.toString());
        }
        return arrayList;
    }
}
