package edu.emory.mathcs.nlp.common.constituent;

import edu.emory.mathcs.nlp.common.constant.StringConst;
import edu.emory.mathcs.nlp.common.treebank.POSLibEn;
import edu.emory.mathcs.nlp.common.treebank.POSTagEn;
import edu.emory.mathcs.nlp.common.util.DSUtils;
import edu.emory.mathcs.nlp.common.util.ENUtils;
import edu.emory.mathcs.nlp.common.util.PatternUtils;
import edu.emory.mathcs.nlp.common.util.StringUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/emory/mathcs/nlp/common/constituent/CTLibEn.class */
public class CTLibEn extends CTLib implements CTTagEn, POSTagEn {
    public static final Pattern P_PASSIVE_NULL = PatternUtils.createClosedORPattern("\\*", "\\*-\\d+");
    public static final Predicate<CTNode> M_NP = CTLib.matchC(CTTagEn.C_NP);
    public static final Predicate<CTNode> M_VP = CTLib.matchC(CTTagEn.C_VP);
    public static final Predicate<CTNode> M_QP = CTLib.matchC(CTTagEn.C_QP);
    public static final Predicate<CTNode> M_ADVP = CTLib.matchC(CTTagEn.C_ADVP);
    public static final Predicate<CTNode> M_SBAR = CTLib.matchC(CTTagEn.C_SBAR);
    public static final Predicate<CTNode> M_EDITED = CTLib.matchC(CTTagEn.C_EDITED);
    public static final Predicate<CTNode> M_SBJ = CTLib.matchF(CTTagEn.F_SBJ);
    public static final Predicate<CTNode> M_NOM = CTLib.matchF(CTTagEn.F_NOM);
    public static final Predicate<CTNode> M_PRD = CTLib.matchF(CTTagEn.F_PRD);
    public static final Predicate<CTNode> M_NP_SBJ = CTLib.matchCF(CTTagEn.C_NP, CTTagEn.F_SBJ);
    public static final Predicate<CTNode> M_NNx = CTLib.matchCp(POSTagEn.POS_NN);
    public static final Predicate<CTNode> M_VBx = CTLib.matchCp(POSTagEn.POS_VB);
    public static final Predicate<CTNode> M_WHx = CTLib.matchCp("WH");
    public static final Predicate<CTNode> M_Sx = CTLib.matchCp("S");
    public static final Predicate<CTNode> M_SBARx = CTLib.matchCp(CTTagEn.C_SBAR);
    public static final Predicate<CTNode> M_S_SBAR = CTLib.matchCo((Set<String>) DSUtils.toHashSet("S", CTTagEn.C_SBAR));
    public static final Predicate<CTNode> M_NP_NML = CTLib.matchCo((Set<String>) DSUtils.toHashSet(CTTagEn.C_NP, CTTagEn.C_NML));
    public static final Predicate<CTNode> M_VBD_VBN = CTLib.matchCo((Set<String>) DSUtils.toHashSet(POSTagEn.POS_VBD, POSTagEn.POS_VBN));
    public static final Predicate<CTNode> M_VP_RRC_UCP = CTLib.matchCo((Set<String>) DSUtils.toHashSet(CTTagEn.C_VP, CTTagEn.C_RRC, CTTagEn.C_UCP));
    private static final Set<String> S_LGS_PHRASE = DSUtils.toHashSet(CTTagEn.C_PP, CTTagEn.C_SBAR);
    private static final Set<String> S_MAIN_CLAUSE = DSUtils.toHashSet("S", CTTagEn.C_SQ, CTTagEn.C_SINV);
    private static final Set<String> S_EDITED_PHRASE = DSUtils.toHashSet(CTTagEn.C_EDITED, CTTagEn.C_EMBED);
    private static final Set<String> S_NOMINAL_PHRASE = DSUtils.toHashSet(CTTagEn.C_NP, CTTagEn.C_NML, CTTagEn.C_NX, CTTagEn.C_NAC);
    private static final Set<String> S_WH_LINK = DSUtils.toHashSet(CTTagEn.C_WHNP, CTTagEn.C_WHPP, CTTagEn.C_WHADVP);
    private static final Set<String> S_SEPARATOR = DSUtils.toHashSet(",", ":");
    private static final Set<String> S_CONJUNCTION = DSUtils.toHashSet(POSTagEn.POS_CC, CTTagEn.C_CONJP);

    private CTLibEn() {
    }

    public static void preprocess(CTTree cTTree) {
        fixFunctionTags(cTTree);
        linkReducedPassiveNulls(cTTree);
        linkRelativizers(cTTree);
    }

    public static void fixFunctionTags(CTTree cTTree) {
        fixFunctionTagsAux(cTTree.getRoot());
    }

    private static void fixFunctionTagsAux(CTNode cTNode) {
        if (fixSBJ(cTNode) || fixLGS(cTNode) || !fixCLF(cTNode)) {
        }
        Iterator<CTNode> it = cTNode.getChildrenList().iterator();
        while (it.hasNext()) {
            fixFunctionTagsAux(it.next());
        }
    }

    private static boolean fixSBJ(CTNode cTNode) {
        if (!cTNode.hasFunctionTag(CTTagEn.F_SBJ)) {
            return false;
        }
        CTNode parent = cTNode.getParent();
        if (parent.getChildrenSize() != 1 || parent.isConstituentTagAny(S_EDITED_PHRASE) || !parent.hasNoFunctionTag()) {
            return false;
        }
        cTNode.removeFunctionTag(CTTagEn.F_SBJ);
        parent.addFunctionTag(CTTagEn.F_SBJ);
        parent.setConstituentTag(cTNode.getConstituentTag());
        return true;
    }

    private static boolean fixLGS(CTNode cTNode) {
        if (!cTNode.hasFunctionTag(CTTagEn.F_LGS) || cTNode.isConstituentTag(CTTagEn.C_PP)) {
            return false;
        }
        CTNode parent = cTNode.getParent();
        if (!parent.isConstituentTagAny(S_LGS_PHRASE)) {
            return false;
        }
        cTNode.removeFunctionTag(CTTagEn.F_LGS);
        parent.addFunctionTag(CTTagEn.F_LGS);
        return true;
    }

    private static boolean fixCLF(CTNode cTNode) {
        if (!cTNode.hasFunctionTag(CTTagEn.F_CLF) || !isMainClause(cTNode)) {
            return false;
        }
        CTNode firstDescendant = cTNode.getFirstDescendant(M_SBARx);
        cTNode.removeFunctionTag(CTTagEn.F_CLF);
        if (firstDescendant == null) {
            return false;
        }
        firstDescendant.addFunctionTag(CTTagEn.F_CLF);
        return true;
    }

    public static void linkReducedPassiveNulls(CTTree cTTree) {
        linkReducedPassiveNullsAux(cTTree, cTTree.getRoot());
    }

    private static void linkReducedPassiveNullsAux(CTTree cTTree, CTNode cTNode) {
        List<CTNode> emptyCategoryList;
        if (isPassiveEmptyCategory(cTNode) && cTNode.isWordForm(CTTagEn.E_NULL)) {
            CTNode parent = cTNode.getParent();
            int emptyCategoryIndex = parent.getParent().getEmptyCategoryIndex();
            if (emptyCategoryIndex != -1 && (emptyCategoryList = cTTree.getEmptyCategoryList(emptyCategoryIndex)) != null) {
                parent = emptyCategoryList.get(0);
            }
            CTNode highestChainedAncestor = parent.getHighestChainedAncestor(M_VP_RRC_UCP);
            if (highestChainedAncestor.getParent().matches(M_NP_NML) || highestChainedAncestor.getParent().hasFunctionTag(CTTagEn.F_NOM)) {
                cTNode.setAntecedent(highestChainedAncestor.getLeftNearestSibling(M_NP_NML));
                if (!cTNode.hasAntecedent()) {
                    cTNode.setAntecedent(highestChainedAncestor.getLeftNearestSibling(M_NNx));
                }
                if (!cTNode.hasAntecedent()) {
                    cTNode.setAntecedent(highestChainedAncestor.getLeftNearestSibling(M_QP));
                }
                if (!cTNode.hasAntecedent()) {
                    cTNode.setAntecedent(highestChainedAncestor.getLeftNearestSibling(M_NOM));
                }
            } else if (isClause(highestChainedAncestor.getParent())) {
                cTNode.setAntecedent(highestChainedAncestor.getLeftNearestSibling(M_NP_SBJ));
                if (!cTNode.hasAntecedent()) {
                    cTNode.setAntecedent(highestChainedAncestor.getRightNearestSibling(M_NP_SBJ));
                }
            }
        }
        Iterator<CTNode> it = cTNode.getChildrenList().iterator();
        while (it.hasNext()) {
            linkReducedPassiveNullsAux(cTTree, it.next());
        }
    }

    public static boolean isPassiveEmptyCategory(CTNode cTNode) {
        if (!cTNode.isEmptyCategory() || !cTNode.matchesWordForm(P_PASSIVE_NULL) || !cTNode.hasParent()) {
            return false;
        }
        CTNode parent = cTNode.getParent();
        return parent.isConstituentTag(CTTagEn.C_NP) && parent.hasNoFunctionTag() && parent.hasParent() && parent.getParent().isConstituentTag(CTTagEn.C_VP) && parent.hasLeftSibling() && parent.getLeftSibling().matches(M_VBD_VBN);
    }

    public static void linkRelativizers(CTTree cTTree) {
        linkComlementizersAux(cTTree, cTTree.getRoot());
    }

    private static void linkComlementizersAux(CTTree cTTree, CTNode cTNode) {
        CTNode leftNearestSibling;
        CTNode cTNode2;
        if (!isWhPhraseLink(cTNode)) {
            Iterator<CTNode> it = cTNode.getChildrenList().iterator();
            while (it.hasNext()) {
                linkComlementizersAux(cTTree, it.next());
            }
            return;
        }
        CTNode relativizer = getRelativizer(cTNode);
        CTNode highestChainedAncestor = cTNode.getHighestChainedAncestor(M_SBAR);
        if (relativizer == null || highestChainedAncestor == null || highestChainedAncestor.hasFunctionTag(CTTagEn.F_NOM) || !ENUtils.isLinkingRelativizer(relativizer.getWordForm())) {
            return;
        }
        if (highestChainedAncestor.getEmptyCategoryIndex() != -1) {
            List<CTNode> emptyCategoryList = cTTree.getEmptyCategoryList(highestChainedAncestor.getEmptyCategoryIndex());
            if (emptyCategoryList != null) {
                Iterator<CTNode> it2 = emptyCategoryList.iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    CTNode next = it2.next();
                    if (next.getWordForm().startsWith(CTTagEn.E_ICH) && next.getParent().isConstituentTag(CTTagEn.C_SBAR)) {
                        highestChainedAncestor = next.getParent();
                        break;
                    }
                }
            }
        } else if (highestChainedAncestor.hasParent() && highestChainedAncestor.getParent().isConstituentTag(CTTagEn.C_UCP)) {
            highestChainedAncestor = highestChainedAncestor.getParent();
        }
        CTNode parent = highestChainedAncestor.getParent();
        if (parent == null) {
            return;
        }
        if (parent.isConstituentTag(CTTagEn.C_NP)) {
            CTNode leftNearestSibling2 = highestChainedAncestor.getLeftNearestSibling(M_NP);
            if (leftNearestSibling2 != null) {
                relativizer.setAntecedent(leftNearestSibling2);
            }
        } else if (parent.isConstituentTag(CTTagEn.C_ADVP)) {
            CTNode leftNearestSibling3 = highestChainedAncestor.getLeftNearestSibling(M_ADVP);
            if (leftNearestSibling3 != null) {
                relativizer.setAntecedent(leftNearestSibling3);
            }
        } else if (parent.isConstituentTag(CTTagEn.C_VP) && (leftNearestSibling = highestChainedAncestor.getLeftNearestSibling(M_PRD)) != null && (highestChainedAncestor.hasFunctionTag(CTTagEn.F_CLF) || ((cTNode.isConstituentTag(CTTagEn.C_WHNP) && leftNearestSibling.isConstituentTag(CTTagEn.C_NP)) || ((cTNode.isConstituentTag(CTTagEn.C_WHPP) && leftNearestSibling.isConstituentTag(CTTagEn.C_PP)) || (cTNode.isConstituentTag(CTTagEn.C_WHADVP) && leftNearestSibling.isConstituentTag(CTTagEn.C_ADVP)))))) {
            relativizer.setAntecedent(leftNearestSibling);
        }
        CTNode antecedent = relativizer.getAntecedent();
        while (true) {
            cTNode2 = antecedent;
            if (cTNode2 == null || !cTNode2.isEmptyCategoryTerminal()) {
                break;
            } else {
                antecedent = cTNode2.getFirstTerminal().getAntecedent();
            }
        }
        relativizer.setAntecedent(cTNode2);
    }

    public static CTNode getRelativizer(CTNode cTNode) {
        if (!isWhPhrase(cTNode)) {
            return null;
        }
        List<CTNode> terminalList = cTNode.getTerminalList();
        if (cTNode.isEmptyCategoryTerminal()) {
            return terminalList.get(0);
        }
        for (CTNode cTNode2 : terminalList) {
            if (POSLibEn.isRelativizer(cTNode2.getConstituentTag())) {
                return cTNode2;
            }
        }
        for (CTNode cTNode3 : terminalList) {
            if (ENUtils.isRelativizer(cTNode3.getWordForm())) {
                return cTNode3;
            }
        }
        return null;
    }

    public static CTNode getWhPhrase(CTNode cTNode) {
        return getNode(cTNode, M_WHx, true);
    }

    public static boolean containsCoordination(CTNode cTNode) {
        return containsCoordination(cTNode, cTNode.getChildrenList());
    }

    public static boolean containsCoordination(CTNode cTNode, List<CTNode> list) {
        if (cTNode.isConstituentTag(CTTagEn.C_UCP)) {
            return true;
        }
        if (cTNode.matches(M_NP_NML) && containsEtc(list)) {
            return true;
        }
        Iterator<CTNode> it = list.iterator();
        while (it.hasNext()) {
            if (isConjunction(it.next())) {
                return true;
            }
        }
        return false;
    }

    private static boolean containsEtc(List<CTNode> list) {
        for (int size = list.size() - 1; size > 0; size--) {
            CTNode cTNode = list.get(size);
            if (!POSLibEn.isPunctuation(cTNode.getConstituentTag())) {
                return isEtc(cTNode);
            }
        }
        return false;
    }

    public static boolean isEtc(CTNode cTNode) {
        return cTNode.hasFunctionTag(CTTagEn.F_ETC) || cTNode.getFirstTerminal().isWordFormIgnoreCase("etc.");
    }

    public static boolean isCoordinator(CTNode cTNode) {
        return isConjunction(cTNode) || isSeparator(cTNode);
    }

    public static boolean isConjunction(CTNode cTNode) {
        return cTNode.isConstituentTagAny(S_CONJUNCTION);
    }

    public static boolean isSeparator(CTNode cTNode) {
        return cTNode.isConstituentTagAny(S_SEPARATOR);
    }

    public static boolean isCorrelativeConjunction(CTNode cTNode) {
        if (cTNode.isConstituentTag(POSTagEn.POS_CC)) {
            return ENUtils.isCorrelativeConjunction(cTNode.getWordForm());
        }
        if (cTNode.isConstituentTag(CTTagEn.C_CONJP)) {
            return StringUtils.toLowerCase(cTNode.toWordForms(false, StringConst.SPACE)).equals("not only");
        }
        return false;
    }

    public static boolean isClause(CTNode cTNode) {
        return isMainClause(cTNode) || isSubordinateClause(cTNode);
    }

    public static boolean isMainClause(CTNode cTNode) {
        return cTNode.isConstituentTagAny(S_MAIN_CLAUSE);
    }

    public static boolean isSubordinateClause(CTNode cTNode) {
        return cTNode.getConstituentTag().startsWith(CTTagEn.C_SBAR);
    }

    public static boolean isNominalPhrase(CTNode cTNode) {
        return cTNode.isConstituentTagAny(S_NOMINAL_PHRASE);
    }

    public static boolean isWhPhraseLink(CTNode cTNode) {
        return cTNode.isConstituentTagAny(S_WH_LINK);
    }

    public static boolean isWhPhrase(CTNode cTNode) {
        return M_WHx.test(cTNode);
    }

    public static boolean isEditedPhrase(CTNode cTNode) {
        return getNode(cTNode, M_EDITED, true) != null;
    }

    public static boolean isDiscontinuousConstituent(CTNode cTNode) {
        String wordForm = cTNode.getWordForm();
        return wordForm.startsWith(CTTagEn.E_ICH) || wordForm.startsWith(CTTagEn.E_PPA) || isRNR(cTNode);
    }

    public static boolean isRNR(CTNode cTNode) {
        return cTNode.getWordForm().startsWith(CTTagEn.E_RNR);
    }

    public static CTNode getNode(CTNode cTNode, Predicate<CTNode> predicate, boolean z) {
        if (predicate.test(cTNode)) {
            return cTNode;
        }
        if (z && cTNode.getChildrenSize() == 1) {
            return getNode(cTNode.getFirstChild(), predicate, z);
        }
        return null;
    }
}
