package querqy.rewrite.lookup.preprocessing;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import querqy.CompoundCharSequence;
import querqy.trie.State;
import querqy.trie.TrieMap;

/* loaded from: input_file:querqy/rewrite/lookup/preprocessing/GermanNounNormalizer.class */
public class GermanNounNormalizer implements LookupPreprocessor {
    public static final int MIN_INPUT_LENGTH = 4;
    static final int MIN_INPUT_LENGTH_TO_STRIP_OFF_S = 5;
    private static final String IDENTITY = "";
    private static final TrieMap<String> MAP = loadTrieMap(false);
    private static final TrieMap<String> REVERSE_MAP = loadTrieMap(true);

    static TrieMap<String> loadTrieMap(boolean z) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(GermanNounNormalizer.class.getClassLoader().getResourceAsStream("de-nouns.txt"), StandardCharsets.UTF_8));
            try {
                TrieMap<String> trieMap = new TrieMap<>();
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        bufferedReader.close();
                        return trieMap;
                    }
                    String trim = readLine.trim();
                    int indexOf = trim.indexOf(35);
                    if (indexOf > -1) {
                        trim = trim.substring(0, indexOf).trim();
                    }
                    if (trim.length() > 0) {
                        String[] split = trim.split(",");
                        updateMap(trim.charAt(0) == ',' ? IDENTITY : split[0], trim.charAt(trim.length() - 1) == ',' ? IDENTITY : split[1], trieMap, z);
                    }
                }
            } finally {
            }
        } catch (IOException e) {
            throw new RuntimeException("Could not load noun list", e);
        }
    }

    static void updateMap(String str, String str2, TrieMap<String> trieMap, boolean z) {
        String fromMap = getFromMap(str, trieMap, z);
        if (str.length() > 0 && IDENTITY != fromMap) {
            trieMap.put(z ? new ReverseCharSequence(str) : str, IDENTITY);
        }
        String fromMap2 = getFromMap(str2, trieMap, z);
        if (str2.length() <= 0 || IDENTITY == fromMap2) {
            return;
        }
        if (str.length() > 0) {
            trieMap.put(z ? new ReverseCharSequence(str2) : str2, str);
        } else {
            trieMap.put(z ? new ReverseCharSequence(str2) : str2, IDENTITY);
        }
    }

    static String getFromMap(CharSequence charSequence, TrieMap<String> trieMap, boolean z) {
        State<String> stateForCompleteSequence = trieMap.get(z ? new ReverseCharSequence(charSequence) : charSequence).getStateForCompleteSequence();
        if (stateForCompleteSequence.isFinal()) {
            return stateForCompleteSequence.value;
        }
        return null;
    }

    @Override // querqy.rewrite.lookup.preprocessing.LookupPreprocessor
    public CharSequence process(CharSequence charSequence) {
        return !isToBeNormalized(charSequence) ? charSequence : lookupInput(charSequence).or(() -> {
            return lookupCompoundInput(charSequence);
        }).or(() -> {
            return trySZLigature(charSequence);
        }).or(() -> {
            return applyRules(charSequence);
        }).orElse(charSequence);
    }

    protected Optional<CharSequence> applyRules(CharSequence charSequence) {
        int length = charSequence.length();
        return (length < MIN_INPUT_LENGTH_TO_STRIP_OFF_S || charSequence.charAt(length - 1) != 's') ? Optional.empty() : Optional.of(charSequence.subSequence(0, length - 1));
    }

    protected Optional<CharSequence> trySZLigature(CharSequence charSequence) {
        for (CharSequence charSequence2 : getSZLigatureVariants(charSequence)) {
            Optional<CharSequence> or = lookupInput(charSequence2).or(() -> {
                return lookupCompoundInput(charSequence2);
            });
            if (or.isPresent()) {
                return or;
            }
        }
        return Optional.empty();
    }

    protected List<CharSequence> getSZLigatureVariants(CharSequence charSequence) {
        int length = charSequence.length() - 1;
        for (int i = 0; i < length; i++) {
            if (charSequence.charAt(i) == 's' && charSequence.charAt(i + 1) == 's') {
                ArrayList arrayList = new ArrayList();
                if (i == 0) {
                    if (length == 1) {
                        return Collections.singletonList("ß");
                    }
                    CharSequence subSequence = charSequence.subSequence(i + 2, charSequence.length());
                    arrayList.add(new CompoundCharSequence((CharSequence) null, "ß", subSequence));
                    if (i + 4 <= length) {
                        for (CharSequence charSequence2 : getSZLigatureVariants(subSequence)) {
                            arrayList.add(new CompoundCharSequence((CharSequence) null, "ß", charSequence2));
                            arrayList.add(new CompoundCharSequence((CharSequence) null, "ss", charSequence2));
                        }
                    }
                    return arrayList;
                }
                CharSequence subSequence2 = charSequence.subSequence(0, i);
                if (length == i + 1) {
                    return Collections.singletonList(new CompoundCharSequence((CharSequence) null, subSequence2, "ß"));
                }
                CharSequence subSequence3 = charSequence.subSequence(i + 2, charSequence.length());
                arrayList.add(new CompoundCharSequence((CharSequence) null, subSequence2, "ß", subSequence3));
                if (i + 4 <= length) {
                    for (CharSequence charSequence3 : getSZLigatureVariants(subSequence3)) {
                        arrayList.add(new CompoundCharSequence((CharSequence) null, subSequence2, "ß", charSequence3));
                        arrayList.add(new CompoundCharSequence((CharSequence) null, subSequence2, "ss", charSequence3));
                    }
                }
                return arrayList;
            }
        }
        return Collections.emptyList();
    }

    protected Optional<CharSequence> lookupCompoundInput(CharSequence charSequence) {
        ReverseCharSequence reverseCharSequence = new ReverseCharSequence(charSequence);
        int i = -1;
        State<String> state = null;
        State<String> stateForCompleteSequence = REVERSE_MAP.get(reverseCharSequence.subSequence(0, 4)).getStateForCompleteSequence();
        if (stateForCompleteSequence.isFinal()) {
            state = stateForCompleteSequence;
            i = 4;
        }
        for (int i2 = 4; stateForCompleteSequence.isKnown && i2 < charSequence.length() - 3; i2++) {
            stateForCompleteSequence = stateForCompleteSequence.node.getNext(reverseCharSequence.charAt(i2)).getStateForCompleteSequence();
            if (stateForCompleteSequence.isFinal()) {
                state = stateForCompleteSequence;
                i = i2;
            }
        }
        if (state != null) {
            return Optional.of(new CompoundCharSequence((CharSequence) null, charSequence.subSequence(0, (charSequence.length() - i) - 1), state.value == IDENTITY ? charSequence.subSequence((charSequence.length() - i) - 1, charSequence.length()) : state.value));
        }
        return Optional.empty();
    }

    protected Optional<CharSequence> lookupInput(CharSequence charSequence) {
        String fromMap = getFromMap(charSequence, MAP, false);
        return fromMap != null ? IDENTITY == fromMap ? Optional.of(charSequence) : Optional.of(fromMap) : Optional.empty();
    }

    protected boolean isToBeNormalized(CharSequence charSequence) {
        if (charSequence.length() < 4) {
            return false;
        }
        int length = charSequence.length();
        for (int i = 0; i < length; i++) {
            if (Character.isDigit(charSequence.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    public static GermanNounNormalizer create() {
        return new GermanNounNormalizer();
    }
}
