package org.opensextant.extractors.geo.rules;

import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.opensextant.ConfigException;
import org.opensextant.data.Place;
import org.opensextant.data.TextInput;
import org.opensextant.extractors.geo.PlaceCandidate;
import org.opensextant.extractors.xtax.TaxonMatch;
import org.opensextant.util.FileUtility;

/* loaded from: input_file:org/opensextant/extractors/geo/rules/PersonNameFilter.class */
public class PersonNameFilter extends GeocodeRule {
    private Set<String> nameFilter;
    private Set<String> titles;
    private Set<String> suffixes;
    private static final int AVG_WORD = 7;
    private static final int LONG_NAME_LEN = 21;
    private Map<String, String> resolvedPersons = new HashMap();
    private Map<String, String> resolvedOrgs = new HashMap();
    private static final Pattern delPeriod = Pattern.compile("\\.+$");
    public static final String NAME_IN_ORG_RULE = "NameInOrg";

    public PersonNameFilter(URL url, URL url2, URL url3) throws ConfigException {
        this.nameFilter = null;
        this.titles = null;
        this.suffixes = null;
        try {
            this.nameFilter = FileUtility.loadDictionary(url, false);
            this.titles = FileUtility.loadDictionary(url2, false);
            this.suffixes = FileUtility.loadDictionary(url3, false);
            debug();
        } catch (IOException e) {
            throw new ConfigException("Default filter not found", e);
        }
    }

    private void debug() {
        if (this.log.isDebugEnabled()) {
            this.log.debug("NAME FILTER\n\t{}", this.nameFilter);
            this.log.debug("TITLE FILTER\n\t{}", this.titles);
            this.log.debug("SUFFIX FILTER\n\t{}", this.suffixes);
        }
    }

    public PersonNameFilter(String str, String str2, String str3) throws ConfigException {
        this.nameFilter = null;
        this.titles = null;
        this.suffixes = null;
        try {
            this.nameFilter = FileUtility.loadDictionary(str, false);
            this.titles = FileUtility.loadDictionary(str2, false);
            this.suffixes = FileUtility.loadDictionary(str3, false);
            debug();
        } catch (IOException e) {
            throw new ConfigException("Default filter not found", e);
        }
    }

    private static final String withoutPeriod(String str) {
        return str.endsWith(".") ? delPeriod.matcher(str).replaceAll("") : str;
    }

    @Override // org.opensextant.extractors.geo.rules.GeocodeRule
    public void reset() {
        this.resolvedPersons.clear();
        this.resolvedOrgs.clear();
    }

    public Map<String, String> getPersonNames() {
        return this.resolvedPersons;
    }

    public Map<String, String> getOrgNames() {
        return this.resolvedOrgs;
    }

    private static final boolean hasNonWhitespace(String str, int i, int i2) {
        for (int i3 = i; i3 < i2; i3++) {
            if (!Character.isWhitespace(str.charAt(i3))) {
                return true;
            }
        }
        return false;
    }

    public void evaluateNamedEntities(TextInput textInput, List<PlaceCandidate> list, List<TaxonMatch> list2, List<TaxonMatch> list3) {
        for (PlaceCandidate placeCandidate : list) {
            if (!placeCandidate.isFilteredOut() && !placeCandidate.isCountry) {
                if (this.resolvedPersons.containsKey(placeCandidate.getTextnorm())) {
                    placeCandidate.setFilteredOut(true);
                    placeCandidate.addRule("ResolvedPerson");
                } else if (this.resolvedOrgs.containsKey(placeCandidate.getTextnorm())) {
                    placeCandidate.setFilteredOut(true);
                    placeCandidate.addRule("ResolvedOrg");
                } else {
                    Iterator<TaxonMatch> it = list2.iterator();
                    while (true) {
                        if (!it.hasNext()) {
                            break;
                        }
                        TaxonMatch next = it.next();
                        String str = null;
                        if (placeCandidate.isWithin(next)) {
                            str = "ResolvedPerson";
                        } else if (placeCandidate.isBefore(next) && placeCandidate.getWordCount() == 1) {
                            if (hasNonWhitespace(textInput.buffer, placeCandidate.end, next.start)) {
                                continue;
                            } else {
                                str = "ResolvedPerson.PreceedingName";
                            }
                        } else if (placeCandidate.isAfter(next)) {
                            if (hasNonWhitespace(textInput.buffer, next.end, placeCandidate.start)) {
                                continue;
                            } else {
                                str = "ResolvedPerson.SucceedingName";
                            }
                        } else if (next.isWithin(placeCandidate)) {
                            placeCandidate.addRule("Contains.PersonName");
                            next.setFilteredOut(true);
                        }
                        if (str != null) {
                            placeCandidate.setFilteredOut(true);
                            this.resolvedPersons.put(placeCandidate.getTextnorm(), next.getText());
                            placeCandidate.addRule(str);
                            break;
                        }
                    }
                    if (!placeCandidate.isFilteredOut() && placeCandidate.getLength() <= LONG_NAME_LEN) {
                        for (TaxonMatch taxonMatch : list3) {
                            if (placeCandidate.isSameMatch(taxonMatch)) {
                                placeCandidate.setFilteredOut(true);
                                this.resolvedOrgs.put(placeCandidate.getTextnorm(), taxonMatch.getText());
                                placeCandidate.addRule("ResolvedOrg");
                            } else if (placeCandidate.isWithin(taxonMatch) && !placeCandidate.isCountry) {
                                placeCandidate.setFilteredOut(true);
                                this.resolvedOrgs.put(placeCandidate.getTextnorm(), taxonMatch.getText());
                                placeCandidate.addRule(NAME_IN_ORG_RULE);
                            } else if (taxonMatch.isWithin(placeCandidate)) {
                                taxonMatch.setFilteredOut(true);
                                placeCandidate.addRule("Contains.OrgName");
                            }
                        }
                    }
                }
            }
        }
    }

    @Override // org.opensextant.extractors.geo.rules.GeocodeRule
    public boolean evaluateNameFilterOnly(PlaceCandidate placeCandidate) {
        if (placeCandidate.isCountry) {
            return true;
        }
        if (NameCodeRule.isRuleFor(placeCandidate)) {
            placeCandidate.setFilteredOut(false);
            return true;
        }
        if (MajorPlaceRule.isRuleFor(placeCandidate)) {
            placeCandidate.setFilteredOut(false);
            return true;
        }
        if (this.resolvedPersons.containsKey(placeCandidate.getTextnorm())) {
            placeCandidate.setFilteredOut(true);
            placeCandidate.addRule("ResolvedPerson.CoRef");
            return true;
        }
        if (this.resolvedOrgs.containsKey(placeCandidate.getTextnorm())) {
            placeCandidate.setFilteredOut(true);
            placeCandidate.addRule("ResolvedOrg.CoRef");
            return true;
        }
        String[] prematchTokens = placeCandidate.getPrematchTokens();
        if (prematchTokens != null && prematchTokens.length > 0) {
            String lowerCase = prematchTokens[prematchTokens.length - 1].toLowerCase();
            if (StringUtils.isNotBlank(lowerCase)) {
                if (this.titles.contains(withoutPeriod(lowerCase))) {
                    placeCandidate.setFilteredOut(true);
                    this.resolvedPersons.put(val(lowerCase, placeCandidate.getTextnorm()), placeCandidate.getText());
                    placeCandidate.addRule("PersonTitle");
                    placeCandidate.addRule("Prefix=" + lowerCase);
                    return true;
                }
                if (this.nameFilter.contains(lowerCase)) {
                    placeCandidate.setFilteredOut(true);
                    this.resolvedPersons.put(placeCandidate.getTextnorm(), String.format("%s %s", lowerCase, placeCandidate.getTextnorm()));
                    placeCandidate.addRule("PersonName");
                    placeCandidate.addRule("Prefix=" + lowerCase);
                    return true;
                }
            }
        }
        if (this.nameFilter.contains(placeCandidate.getTextnorm())) {
            placeCandidate.setFilteredOut(true);
            this.resolvedPersons.put(placeCandidate.getTextnorm(), placeCandidate.getText());
            placeCandidate.addRule("PersonName");
            return true;
        }
        String[] postmatchTokens = placeCandidate.getPostmatchTokens();
        if (postmatchTokens == null || postmatchTokens.length <= 0) {
            return true;
        }
        String lowerCase2 = postmatchTokens[0].toLowerCase();
        if (!this.suffixes.contains(withoutPeriod(lowerCase2))) {
            return true;
        }
        placeCandidate.setFilteredOut(true);
        this.resolvedPersons.put(val(placeCandidate.getTextnorm(), lowerCase2), placeCandidate.getText());
        placeCandidate.addRule("PersonSuffix");
        return true;
    }

    @Override // org.opensextant.extractors.geo.rules.GeocodeRule
    public void evaluate(PlaceCandidate placeCandidate, Place place) {
    }

    private String val(String str, String str2) {
        return String.format("%s/%s", str, str2);
    }
}
