package com.apple.foundationdb.record.provider.common.text;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.record.provider.common.text.TextTokenizer;
import java.text.BreakIterator;
import java.text.Normalizer;
import java.util.Iterator;
import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

@API(API.Status.EXPERIMENTAL)
/* loaded from: input_file:com/apple/foundationdb/record/provider/common/text/DefaultTextTokenizer.class */
public class DefaultTextTokenizer implements TextTokenizer {

    @Nonnull
    private static final DefaultTextTokenizer INSTANCE = new DefaultTextTokenizer();

    @Nonnull
    private static final Normalizer.Form NORMALIZED_FORM = Normalizer.Form.NFKD;

    @Nonnull
    private static final Pattern DIACRITICAL_PATTERN = Pattern.compile("\\p{M}+");

    @Nonnull
    public static final String NAME = "default";

    /* loaded from: input_file:com/apple/foundationdb/record/provider/common/text/DefaultTextTokenizer$BreakIteratorWrapper.class */
    private static class BreakIteratorWrapper implements Iterator<String> {

        @Nonnull
        private final BreakIterator underlying;

        @Nonnull
        private final String text;
        private int lastBreak;

        @Nullable
        private String nextToken = null;

        @Nonnull
        private Matcher matcher = DefaultTextTokenizer.DIACRITICAL_PATTERN.matcher("");

        private BreakIteratorWrapper(@Nonnull BreakIterator breakIterator, @Nonnull String str) {
            this.underlying = breakIterator;
            this.text = str;
            this.lastBreak = breakIterator.first();
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            if (this.nextToken != null) {
                return true;
            }
            int following = this.underlying.following(this.lastBreak);
            while (true) {
                int i = following;
                if (this.nextToken != null || i == -1) {
                    break;
                }
                String substring = this.text.substring(this.lastBreak, i);
                if (!Normalizer.isNormalized(substring, DefaultTextTokenizer.NORMALIZED_FORM)) {
                    substring = Normalizer.normalize(substring, DefaultTextTokenizer.NORMALIZED_FORM);
                }
                boolean z = false;
                int i2 = 0;
                while (true) {
                    if (i2 >= substring.length()) {
                        break;
                    }
                    if (Character.isLetterOrDigit(substring.charAt(i2))) {
                        z = true;
                        break;
                    }
                    i2++;
                }
                if (z) {
                    this.nextToken = this.matcher.reset(substring.toLowerCase(Locale.ROOT)).replaceAll("");
                }
                this.lastBreak = i;
                following = this.underlying.next();
            }
            return this.nextToken != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        @Nonnull
        public String next() {
            if (!hasNext()) {
                throw new NoSuchElementException("No more tokens found in text");
            }
            String str = this.nextToken;
            this.nextToken = null;
            return str;
        }
    }

    private DefaultTextTokenizer() {
    }

    @Nonnull
    public static DefaultTextTokenizer instance() {
        return INSTANCE;
    }

    @Override // com.apple.foundationdb.record.provider.common.text.TextTokenizer
    @Nonnull
    public Iterator<String> tokenize(@Nonnull String str, int i, @Nonnull TextTokenizer.TokenizerMode tokenizerMode) {
        validateVersion(i);
        BreakIterator wordInstance = BreakIterator.getWordInstance(Locale.ROOT);
        wordInstance.setText(str);
        return new BreakIteratorWrapper(wordInstance, str);
    }

    @Override // com.apple.foundationdb.record.provider.common.text.TextTokenizer
    @Nonnull
    public String getName() {
        return "default";
    }

    @Override // com.apple.foundationdb.record.provider.common.text.TextTokenizer
    public int getMaxVersion() {
        return getMinVersion();
    }
}
