package org.opensextant.extractors.xtax;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.opensextant.ConfigException;
import org.opensextant.data.Taxon;
import org.opensextant.data.TextInput;
import org.opensextant.extraction.ExtractionException;
import org.opensextant.extraction.Extractor;
import org.opensextant.extraction.SolrMatcherSupport;
import org.opensextant.extraction.TextMatch;
import org.opensextant.util.SolrProxy;
import org.opensextant.util.SolrUtil;
import org.opensextant.util.TextUtils;

/* loaded from: input_file:org/opensextant/extractors/xtax/TaxonMatcher.class */
public class TaxonMatcher extends SolrMatcherSupport implements Extractor {
    private static ModifiableSolrParams params = new ModifiableSolrParams();
    private boolean tagAll = true;
    private boolean filterNonAcronyms = true;
    public Set<String> catalogs = new HashSet();
    private Set<String> taxonExclusionFilter = new HashSet();
    private TaxonFilter ruleFilter = new TaxonFilter();

    public TaxonMatcher() throws IOException, ConfigException {
        configure();
    }

    public void cleanup() {
        close();
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport
    public String getCoreName() {
        return "taxcat";
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport
    public SolrParams getMatcherParameters() {
        return params;
    }

    @Override // org.opensextant.extraction.SolrMatcherSupport
    public Object createTag(SolrDocument solrDocument) {
        String string = SolrProxy.getString(solrDocument, "catalog");
        if (!this.tagAll && !this.catalogs.contains(string)) {
            return null;
        }
        if (!this.taxonExclusionFilter.isEmpty()) {
            String lowerCase = SolrUtil.getString(solrDocument, "taxnode").toLowerCase();
            Iterator<String> it = this.taxonExclusionFilter.iterator();
            while (it.hasNext()) {
                if (lowerCase.startsWith(it.next())) {
                    return null;
                }
            }
        }
        return createTaxon(solrDocument);
    }

    public static Taxon createTaxon(SolrDocument solrDocument) {
        Taxon taxon = new Taxon();
        taxon.name = SolrUtil.getString(solrDocument, "taxnode");
        taxon.isAcronym = "A".equals(SolrUtil.getString(solrDocument, "name_type"));
        taxon.catalog = SolrUtil.getString(solrDocument, "catalog");
        taxon.addTerm(SolrUtil.getString(solrDocument, "phrase"));
        taxon.addTags(SolrUtil.getStrings(solrDocument, "tag"));
        return taxon;
    }

    public String getName() {
        return "XTax";
    }

    public void configure() throws ConfigException {
        try {
            initialize();
            extract(new TextInput("__initialization___", "trivial priming of the solr pump"));
        } catch (Exception e) {
            throw new ConfigException("Failed to configure TaxMatcher", e);
        }
    }

    public void configure(String str) throws ConfigException {
        throw new ConfigException("Not a valid configuration routine");
    }

    public void configure(URL url) throws ConfigException {
        throw new ConfigException("Not a valid configuration routine");
    }

    public void addCatalogFilters(String[] strArr) {
        this.catalogs.addAll(Arrays.asList(strArr));
        this.tagAll = false;
    }

    public void addCatalogFilter(String str) {
        this.catalogs.add(str);
        this.tagAll = false;
    }

    public void removeFilters() {
        this.catalogs.clear();
        this.taxonExclusionFilter.clear();
        this.tagAll = true;
    }

    public void excludeTaxons(String str) {
        this.taxonExclusionFilter.add(str.toLowerCase());
    }

    public List<TextMatch> extract(String str) throws ExtractionException {
        return extractorImpl(null, str);
    }

    private List<TextMatch> extractorImpl(String str, String str2) throws ExtractionException {
        String str3 = str != null ? str : "no-docid";
        HashMap hashMap = new HashMap(100);
        QueryResponse tagTextCallSolrTagger = tagTextCallSolrTagger(str2, str3, hashMap);
        if (hashMap.isEmpty()) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        List<NamedList> list = (List) tagTextCallSolrTagger.getResponse().get("tags");
        this.log.debug("TAGS SIZE = {}", Integer.valueOf(list.size()));
        int i = 0;
        String str4 = str3 + "#";
        for (NamedList namedList : list) {
            TaxonMatch taxonMatch = new TaxonMatch();
            taxonMatch.start = ((Integer) namedList.get("startOffset")).intValue();
            taxonMatch.end = ((Integer) namedList.get("endOffset")).intValue();
            i++;
            taxonMatch.match_id = str4 + i;
            if (TextUtils.countFormattingSpace(str2.substring(taxonMatch.start, taxonMatch.end)) <= 1) {
                taxonMatch.setText(str2.substring(taxonMatch.start, taxonMatch.end));
                taxonMatch.setFilteredOut(this.ruleFilter.filterOut(taxonMatch.getText()));
                Iterator it = ((List) namedList.get("ids")).iterator();
                while (it.hasNext()) {
                    Object obj = hashMap.get(it.next());
                    if (obj != null) {
                        Taxon taxon = (Taxon) obj;
                        if (!this.filterNonAcronyms || !taxon.isAcronym || taxonMatch.isUpper()) {
                            taxonMatch.addTaxon(taxon);
                        }
                    }
                }
                if (taxonMatch.hasTaxons()) {
                    arrayList.add(taxonMatch);
                }
            }
        }
        this.log.debug("FOUND LABELS count={}", Integer.valueOf(arrayList.size()));
        return arrayList;
    }

    public List<TextMatch> extract(TextInput textInput) throws ExtractionException {
        return extractorImpl(textInput.id, textInput.buffer);
    }

    public static List<Taxon> search(SolrClient solrClient, String str) throws SolrServerException, IOException {
        ModifiableSolrParams modifiableSolrParams = new ModifiableSolrParams();
        modifiableSolrParams.set("fl", new String[]{"id,catalog,taxnode,phrase,tag,name_type"});
        modifiableSolrParams.set("q", new String[]{str});
        return search(solrClient, (SolrParams) modifiableSolrParams);
    }

    public static List<Taxon> search(SolrClient solrClient, SolrParams solrParams) throws SolrServerException, IOException {
        QueryResponse query = solrClient.query(solrParams, SolrRequest.METHOD.GET);
        ArrayList arrayList = new ArrayList();
        Iterator it = query.getResults().iterator();
        while (it.hasNext()) {
            arrayList.add(createTaxon((SolrDocument) it.next()));
        }
        return arrayList;
    }

    public List<Taxon> search(String str) throws SolrServerException, IOException {
        return search(this.solr.getInternalSolrClient(), str);
    }

    public List<Taxon> search(SolrParams solrParams) throws SolrServerException, IOException {
        return search(this.solr.getInternalSolrClient(), solrParams);
    }

    static {
        params.set("fl", new String[]{"id,catalog,taxnode,phrase,tag,name_type"});
        params.set("tagsLimit", 100000);
        params.set("subTags", false);
        params.set("matchText", false);
        params.set("fq", new String[]{"valid:true"});
        params.set("overlaps", new String[]{"NO_SUB"});
    }
}
