package org.opensextant.extractors.geo;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.opensextant.ConfigException;
import org.opensextant.data.LatLon;
import org.opensextant.data.Place;
import org.opensextant.data.TextInput;
import org.opensextant.extraction.ExtractionException;
import org.opensextant.extraction.MatchFilter;
import org.opensextant.extraction.SolrMatcherSupport;
import org.opensextant.extraction.TagFilter;
import org.opensextant.util.GeodeticUtility;
import org.opensextant.util.SolrUtil;
import org.opensextant.util.TextUtils;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opensextant/extractors/geo/GazetteerMatcher.class */
public class GazetteerMatcher extends SolrMatcherSupport {
    protected TagFilter filter;
    private MatchFilter userfilter;
    private MatchFilter continents;
    private long defaultFilterCount;
    private long userFilterCount;
    private long filteredTotal;
    private long matchedTotal;
    private boolean allowLowercaseAbbrev;
    private static final int PHRASE_LEN = 20;
    private boolean allowLowerCase;
    private boolean enableCaseFilter;
    private boolean enableCodeHunter;
    private final ModifiableSolrParams params;
    private SolrGazetteer gazetteer;
    public static final String DEFAULT_TAG_FIELD = "name_tag";
    public static final String CJK_TAG_FIELD = "name_tag_cjk";
    public static final String AR_TAG_FIELD = "name_tag_ar";
    protected static final HashMap<String, String> lang2nameField = new HashMap<>();
    private static final String CONTRACTIONS = "SsTtDd";

    public GazetteerMatcher() throws ConfigException {
        this(false);
    }

    public GazetteerMatcher(boolean z) throws ConfigException {
        this.userfilter = null;
        this.defaultFilterCount = 0L;
        this.userFilterCount = 0L;
        this.filteredTotal = 0L;
        this.matchedTotal = 0L;
        this.allowLowercaseAbbrev = false;
        this.enableCaseFilter = true;
        this.enableCodeHunter = false;
        this.params = new ModifiableSolrParams();
        this.gazetteer = null;
        this.log = LoggerFactory.getLogger(GazetteerMatcher.class);
        initialize();
        this.allowLowerCase = z;
        try {
            this.continents = new MatchFilter("/filters/continent-filter.txt");
            try {
                this.filter = new TagFilter();
                this.filter.enableStopwordFilter(true);
                this.filter.enableCaseSensitive(!this.allowLowerCase);
                tagText("trivial priming of the solr pump", "__initialization___");
            } catch (ExtractionException | IOException e) {
                throw new ConfigException("Unable to prime the tagger", e);
            }
        } catch (IOException e2) {
            throw new ConfigException("Could not find continent list.", e2);
        }
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport
    public void initialize() throws ConfigException {
        super.initialize();
        this.params.set("fl", new String[]{SolrGazetteer.DEFAULT_FIELDS});
        this.params.set("tagsLimit", SolrMatcherSupport.DEFAULT_TAG_LIMIT);
        this.params.set("rows", SolrMatcherSupport.DEFAULT_TAG_LIMIT);
        this.params.set("subTags", false);
        this.params.set("overlaps", new String[]{"LONGEST_DOMINANT_RIGHT"});
        this.gazetteer = new SolrGazetteer(this.solr);
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport
    public String getCoreName() {
        return "gazetteer";
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport
    public SolrParams getMatcherParameters() {
        return this.params;
    }

    public SolrGazetteer getGazetteer() {
        return this.gazetteer;
    }

    public void reportMemory() {
        Runtime runtime = Runtime.getRuntime();
        this.log.info("CURRENT MEM USAGE(K)={}", Integer.valueOf((int) ((runtime.totalMemory() - runtime.freeMemory()) / 1024)));
    }

    public void setAllowLowerCaseAbbreviations(boolean z) {
        this.allowLowercaseAbbrev = z;
    }

    public void setAllowLowerCase(boolean z) {
        this.allowLowerCase = z;
        this.filter.enableCaseSensitive(!z);
    }

    public void setEnableCaseFilter(boolean z) {
        this.enableCaseFilter = z;
    }

    public void setEnableCodeHunter(boolean z) {
        this.enableCodeHunter = z;
    }

    public void setMatchFilter(MatchFilter matchFilter) {
        this.userfilter = matchFilter;
    }

    public List<Place> searchAdvanced(String str, boolean z) throws SolrServerException, IOException {
        return searchAdvanced(str, z, -1);
    }

    public List<Place> searchAdvanced(String str, boolean z, int i) throws SolrServerException, IOException {
        String string;
        if (z) {
            this.params.set("q", new String[]{str});
        } else {
            this.params.set("q", new String[]{"\"" + str + "\""});
        }
        QueryResponse query = this.solr.getInternalSolrClient().query(this.params, SolrRequest.METHOD.GET);
        ArrayList arrayList = new ArrayList();
        Iterator it = query.getResults().iterator();
        while (it.hasNext()) {
            SolrDocument solrDocument = (SolrDocument) it.next();
            if (i <= 0 || (string = SolrUtil.getString(solrDocument, "name")) == null || string.length() <= i) {
                arrayList.add(createPlace(solrDocument));
            }
        }
        return arrayList;
    }

    public List<PlaceCandidate> tagText(String str, String str2) throws ExtractionException {
        return tagText(str, str2, false);
    }

    public List<PlaceCandidate> tagText(String str, String str2, boolean z) throws ExtractionException {
        return tagText(new TextInput(str2, str), z, DEFAULT_TAG_FIELD);
    }

    public List<PlaceCandidate> tagText(String str, String str2, boolean z, String str3) throws ExtractionException {
        return tagText(new TextInput(str2, str), z, str3);
    }

    public List<PlaceCandidate> tagText(TextInput textInput, boolean z) throws ExtractionException {
        String str;
        String str2 = DEFAULT_TAG_FIELD;
        if (textInput.langid != null && (str = lang2nameField.get(textInput.langid)) != null) {
            str2 = str;
        }
        return tagText(textInput, z, str2);
    }

    public List<PlaceCandidate> tagText(TextInput textInput, boolean z, String str) throws ExtractionException {
        this.defaultFilterCount = 0L;
        this.userFilterCount = 0L;
        long currentTimeMillis = System.currentTimeMillis();
        this.log.debug("TEXT SIZE = {}", Integer.valueOf(textInput.buffer.length()));
        this.params.set("field", new String[]{str});
        HashMap hashMap = new HashMap(100);
        QueryResponse tagTextCallSolrTagger = tagTextCallSolrTagger(textInput.buffer, textInput.id, hashMap);
        if (hashMap.isEmpty()) {
            return new ArrayList();
        }
        textInput.characterize();
        List<NamedList> list = (List) tagTextCallSolrTagger.getResponse().get("tags");
        this.tagNamesTime = tagTextCallSolrTagger.getQTime();
        long j = currentTimeMillis + this.tagNamesTime;
        long currentTimeMillis2 = System.currentTimeMillis();
        boolean z2 = !z;
        this.log.debug("DOC={} TAGS SIZE={}", textInput.id, Integer.valueOf(list.size()));
        TreeMap treeMap = new TreeMap();
        HashSet hashSet = new HashSet();
        for (NamedList namedList : list) {
            boolean z3 = true;
            int intValue = ((Integer) namedList.get("startOffset")).intValue();
            int intValue2 = ((Integer) namedList.get("endOffset")).intValue();
            int i = intValue2 - intValue;
            if (i == 1) {
                this.defaultFilterCount++;
            } else {
                String str2 = (String) namedList.get("matchText");
                if (TextUtils.hasIrregularPunctuation(str2)) {
                    this.defaultFilterCount++;
                } else {
                    char charAt = intValue2 < textInput.buffer.length() ? textInput.buffer.charAt(intValue2) : (char) 0;
                    if (intValue <= 0 || !assessApostrophe(textInput.buffer.charAt(intValue - 1), str2)) {
                        boolean z4 = (this.allowLowerCase || this.allowLowercaseAbbrev || this.enableCodeHunter) ? false : true;
                        if (i <= 20 && z4 && TextUtils.isASCII(str2) && TextUtils.isLower(str2)) {
                            this.defaultFilterCount++;
                        } else if (TextUtils.countFormattingSpace(str2) > 1) {
                            this.defaultFilterCount++;
                        } else {
                            String squeeze_whitespace = TextUtils.squeeze_whitespace(str2);
                            if (this.filter.filterOut(squeeze_whitespace)) {
                                this.defaultFilterCount++;
                            } else {
                                PlaceCandidate placeCandidate = new PlaceCandidate(intValue, intValue2);
                                placeCandidate.setText(squeeze_whitespace);
                                if (this.userfilter != null && this.userfilter.filterOut(placeCandidate.getTextnorm())) {
                                    this.log.debug("User Filter:{}", squeeze_whitespace);
                                    this.userFilterCount++;
                                } else if (this.continents.filterOut(placeCandidate.getTextnorm())) {
                                    placeCandidate.isContinent = true;
                                    placeCandidate.setFilteredOut(true);
                                    treeMap.put(Integer.valueOf(placeCandidate.start), placeCandidate);
                                } else if (this.enableCaseFilter && this.filter.filterOut(placeCandidate, textInput)) {
                                    this.defaultFilterCount++;
                                    this.log.debug("STOPWORD {} {}", textInput.langid, placeCandidate.getText());
                                } else {
                                    placeCandidate.inferTextSense(textInput.isLower, textInput.isUpper);
                                    placeCandidate.setSurroundingTokens(textInput.buffer);
                                    List list2 = (List) namedList.get("ids");
                                    hashSet.clear();
                                    boolean z5 = list2.size() > 100;
                                    Iterator it = list2.iterator();
                                    while (true) {
                                        if (!it.hasNext()) {
                                            break;
                                        }
                                        Object next = it.next();
                                        Place place = (Place) hashMap.get(next);
                                        if (place == null) {
                                            throw new ExtractionException(String.format("[Text ID: %s] Place instance not found in-memory for gazetteer tag ID %s", textInput.id, next));
                                        }
                                        if (GeodeticUtility.isCoord(place)) {
                                            if (z5) {
                                                if (place.isAdministrative() || place.isPopulated()) {
                                                    if (place.getFeatureCode() != null) {
                                                        if (!place.getFeatureCode().endsWith("X") && !place.getFeatureCode().endsWith("H")) {
                                                        }
                                                    }
                                                }
                                            }
                                            this.log.debug("{} = {}", placeCandidate.getText(), place);
                                            if (place.isCode() && !place.getName().equalsIgnoreCase(placeCandidate.getText())) {
                                                z3 = false;
                                                break;
                                            }
                                            if (placeCandidate.isAbbrevLength()) {
                                                if (placeCandidate.isLower()) {
                                                    z3 = this.allowLowercaseAbbrev || this.allowLowerCase;
                                                }
                                                if (!z3) {
                                                    break;
                                                }
                                                if (!this.enableCodeHunter && !placeCandidate.isAbbreviation) {
                                                    assessAbbreviation(placeCandidate, place, charAt, textInput.isUpper);
                                                }
                                            }
                                            if (this.log.isDebugEnabled()) {
                                                hashSet.add(place.getName());
                                            }
                                            if (place.isCountry()) {
                                                placeCandidate.isCountry = true;
                                            }
                                            if (place.isCode() && placeCandidate.isUpper()) {
                                                placeCandidate.isAbbreviation = true;
                                                placeCandidate.isAcronym = true;
                                            }
                                            if (z2) {
                                                place.defaultHierarchicalPath();
                                                ScoredPlace scoredPlace = new ScoredPlace();
                                                scoredPlace.setPlace(place);
                                                placeCandidate.addPlace(scoredPlace);
                                            }
                                        }
                                    }
                                    if (z3 && placeCandidate.hasPlaces()) {
                                        treeMap.put(Integer.valueOf(placeCandidate.start), placeCandidate);
                                        this.log.debug("Text {} matched {}", placeCandidate.getText(), hashSet);
                                    } else {
                                        this.log.debug("Place has no places={}", placeCandidate.getText());
                                    }
                                }
                            }
                        }
                    } else {
                        this.defaultFilterCount++;
                    }
                }
            }
        }
        long currentTimeMillis3 = System.currentTimeMillis();
        this.getNamesTime = (int) (currentTimeMillis2 - j);
        this.totalTime = (int) (currentTimeMillis3 - currentTimeMillis);
        if (this.log.isDebugEnabled()) {
            summarizeExtraction(treeMap.values(), textInput.id);
        }
        this.filteredTotal += this.defaultFilterCount + this.userFilterCount;
        this.matchedTotal += treeMap.size();
        return new ArrayList(treeMap.values());
    }

    private static boolean assessApostrophe(char c, String str) {
        return (c == '\'' || c == 8217) && CONTRACTIONS.indexOf(str.charAt(0)) >= 0 && str.charAt(1) == ' ';
    }

    private void assessAbbreviation(PlaceCandidate placeCandidate, Place place, char c, boolean z) {
        if (c > 0 && place.isAbbreviation()) {
            if (c == '.') {
                placeCandidate.isAbbreviation = true;
                placeCandidate.end++;
                placeCandidate.setTextOnly(String.format("%s.", placeCandidate.getText()));
            } else if (placeCandidate.getText().contains(".")) {
                placeCandidate.isAbbreviation = true;
            } else if (!z && placeCandidate.isUpper() && placeCandidate.isAbbrevLength()) {
                placeCandidate.isAbbreviation = true;
                placeCandidate.isAcronym = true;
            }
        }
    }

    public double getFiltrationRatio() {
        return this.filteredTotal / (this.filteredTotal + this.matchedTotal);
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport
    public Object createTag(SolrDocument solrDocument) {
        return createPlace(solrDocument);
    }

    public static Place createPlace(SolrDocument solrDocument) {
        Place place = new Place(SolrUtil.getString(solrDocument, "place_id"), SolrUtil.getString(solrDocument, "name"));
        SolrUtil.populatePlace(solrDocument, place);
        return place;
    }

    private void summarizeExtraction(Collection<PlaceCandidate> collection, String str) {
        if (collection == null) {
            this.log.error("Something is very wrong.");
            return;
        }
        this.log.debug("DOC={} PLACE CANDIDATES SIZE = {}", str, Integer.valueOf(collection.size()));
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        int i = 0;
        for (PlaceCandidate placeCandidate : collection) {
            String normalizeTextEntity = TextUtils.normalizeTextEntity(placeCandidate.getText());
            if (normalizeTextEntity == null) {
                i++;
                hashMap.put("null", Integer.valueOf(i));
            } else {
                String lowerCase = normalizeTextEntity.toLowerCase();
                Iterator<ScoredPlace> it = placeCandidate.getPlaces().iterator();
                while (true) {
                    if (!it.hasNext()) {
                        break;
                    }
                    if (it.next().getPlace().isCountry()) {
                        Integer.valueOf(((Integer) hashMap.computeIfAbsent(lowerCase, str2 -> {
                            return 0;
                        })).intValue() + 1);
                        break;
                    }
                    Integer.valueOf(((Integer) hashMap2.computeIfAbsent(lowerCase, str3 -> {
                        return 0;
                    })).intValue() + 1);
                }
            }
        }
        if (this.log.isDebugEnabled()) {
            this.log.debug("Countries found: {}", hashMap);
            this.log.debug("Places found: {}", hashMap2);
        }
    }

    @Deprecated
    public List<Place> placesAt(LatLon latLon) throws SolrServerException, IOException {
        return this.gazetteer.placesAt(latLon, 50);
    }

    static {
        lang2nameField.put("zh", CJK_TAG_FIELD);
        lang2nameField.put("zt", CJK_TAG_FIELD);
        lang2nameField.put("ja", CJK_TAG_FIELD);
        lang2nameField.put("ko", CJK_TAG_FIELD);
        lang2nameField.put("ar", AR_TAG_FIELD);
        lang2nameField.put("fa", AR_TAG_FIELD);
        lang2nameField.put("ur", AR_TAG_FIELD);
    }
}
