package nlp4j.indexer;

import java.lang.invoke.MethodHandles;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import nlp4j.Document;
import nlp4j.Keyword;
import nlp4j.counter.Counter;
import nlp4j.impl.DefaultKeyword;
import nlp4j.util.DateUtils;
import nlp4j.util.FacetUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:nlp4j/indexer/SimpleDocumentIndex.class */
public class SimpleDocumentIndex extends AbstractDocumentIndexer implements DocumentIndexer {
    private static final Logger logger = LogManager.getLogger(MethodHandles.lookup().lookupClass());
    public static final String KEY_DATEFIELD = "datefield";
    private HashMap<String, Document> map_docid_document = new HashMap<>();
    private HashMap<String, HashMap<String, Long>> mapFacetKeywordCount = new HashMap<>();
    private HashMap<String, Counter<String>> mapItemCount = new HashMap<>();
    private HashMap<Keyword, Long> keywordCount = new HashMap<>();
    private ArrayList<Keyword> keywords = new ArrayList<>();
    private List<String> docids = new ArrayList();
    private HashMap<String, List<String>> mapKeywordDocumentids = new HashMap<>();
    Counter<String> dateCounterYYYY = new Counter<>();
    Counter<String> dateCounterYYYYMM = new Counter<>();
    Counter<String> dateCounterYYYYMMDD = new Counter<>();
    private String dateField = null;
    private String dateFieldFormat = null;

    @Override // nlp4j.indexer.DocumentIndexer
    public void addDocument(Document document) {
        String attributeAsString;
        String id = document.getId();
        if (id != null) {
            this.docids.add(id);
            this.map_docid_document.put(id, document);
        } else {
            logger.warn("ID is null: " + document);
        }
        for (String str : document.getAttributeKeys()) {
            String attributeAsString2 = document.getAttributeAsString(str);
            if (attributeAsString2 != null) {
                if (this.mapItemCount.containsKey(str)) {
                    this.mapItemCount.get(str).add(attributeAsString2);
                } else {
                    Counter<String> counter = new Counter<>();
                    counter.add(attributeAsString2);
                    this.mapItemCount.put(str, counter);
                }
            }
        }
        HashSet hashSet = new HashSet();
        for (Keyword keyword : document.getKeywords()) {
            if (!hashSet.contains(keyword.getLex())) {
                countKeyword(keyword);
                hashSet.add(keyword.getLex());
            }
            String str2 = keyword.getFacet() + "." + keyword.getLex();
            if (this.mapKeywordDocumentids.get(str2) == null) {
                this.mapKeywordDocumentids.put(str2, new ArrayList());
            }
            if (!this.mapKeywordDocumentids.get(str2).contains(id)) {
                this.mapKeywordDocumentids.get(str2).add(id);
            }
        }
        if (this.dateField == null || (attributeAsString = document.getAttributeAsString(this.dateField)) == null) {
            return;
        }
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat(this.dateFieldFormat);
        SimpleDateFormat simpleDateFormat2 = new SimpleDateFormat("yyyy");
        SimpleDateFormat simpleDateFormat3 = new SimpleDateFormat("yyyyMM");
        SimpleDateFormat simpleDateFormat4 = new SimpleDateFormat("yyyyMMdd");
        try {
            Date parse = simpleDateFormat.parse(attributeAsString);
            if (1 == 1) {
                this.dateCounterYYYY.add(simpleDateFormat2.format(parse));
            }
            if (1 == 1) {
                this.dateCounterYYYYMM.add(simpleDateFormat3.format(parse));
            }
            if (1 == 1) {
                this.dateCounterYYYYMMDD.add(simpleDateFormat4.format(parse));
            }
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    public long getKeywordCount(Keyword keyword) {
        Long l = this.keywordCount.get(keyword);
        if (l != null) {
            return l.longValue();
        }
        return 0L;
    }

    private void countKeyword(Keyword keyword) {
        this.keywords.add(keyword);
        Long l = this.keywordCount.get(keyword);
        if (l == null) {
            this.keywordCount.put(keyword, 1L);
        } else {
            Long valueOf = Long.valueOf(l.longValue() + 1);
            keyword.setCount(valueOf.longValue());
            this.keywordCount.put(keyword, valueOf);
        }
        for (String str : FacetUtils.splitFacetPath(keyword.getFacet())) {
            HashMap<String, Long> hashMap = this.mapFacetKeywordCount.get(str);
            if (hashMap == null) {
                HashMap<String, Long> hashMap2 = new HashMap<>();
                hashMap2.put(keyword.getLex(), 1L);
                logger.debug("put: new keyword(1)");
                this.mapFacetKeywordCount.put(str, hashMap2);
                logger.debug("put: new facet count");
            } else {
                Long l2 = hashMap.get(keyword.getLex());
                if (l2 == null) {
                    hashMap.put(keyword.getLex(), 1L);
                    logger.debug("put: new keyword(2)");
                } else {
                    hashMap.put(keyword.getLex(), Long.valueOf(l2.longValue() + 1));
                    logger.debug("increment: keyword");
                }
            }
        }
    }

    public List<Keyword> getDateCount(String str) {
        List<String> objectListSorted;
        ArrayList arrayList = new ArrayList();
        Counter<String> counter = null;
        String str2 = null;
        int i = -1;
        if ("YYYY".equals(str.toUpperCase())) {
            counter = this.dateCounterYYYY;
            str2 = "yyyy";
            i = 1;
        } else if ("YYYYMM".equals(str.toUpperCase())) {
            counter = this.dateCounterYYYYMM;
            str2 = "yyyyMM";
            i = 2;
        } else if ("YYYYMMDD".equals(str.toUpperCase())) {
            counter = this.dateCounterYYYYMMDD;
            str2 = "yyyyMMdd";
            i = 5;
        }
        if (counter != null && (objectListSorted = counter.getObjectListSorted()) != null && objectListSorted.size() > 0) {
            objectListSorted.sort(Comparator.naturalOrder());
            for (String str3 : DateUtils.getCalendarValues(objectListSorted.get(0), objectListSorted.get(objectListSorted.size() - 1), str2, i)) {
                DefaultKeyword defaultKeyword = new DefaultKeyword();
                defaultKeyword.setLex(str3);
                defaultKeyword.setFacet(this.dateField);
                defaultKeyword.setCount(counter.getCount(str3));
                arrayList.add(defaultKeyword);
            }
        }
        return arrayList;
    }

    public List<Keyword> getDateCountDay() {
        return getDateCount("YYYYMMDD");
    }

    public List<Keyword> getDateCountMonth() {
        return getDateCount("YYYYMM");
    }

    public List<Keyword> getDateCountYear() {
        return getDateCount("YYYY");
    }

    public List<String> getDocumentIds() {
        return this.docids;
    }

    public long getDocumentCount() {
        return this.docids.size();
    }

    public List<String> getDocumentidsByKeyword(Keyword keyword) {
        String str = keyword.getFacet() + "." + keyword.getLex();
        return this.mapKeywordDocumentids.get(str) != null ? this.mapKeywordDocumentids.get(str) : new ArrayList();
    }

    public List<Document> getDocumentsByKeyword(Keyword keyword) {
        String str = keyword.getFacet() + "." + keyword.getLex();
        if (this.mapKeywordDocumentids.get(str) == null) {
            return new ArrayList();
        }
        List<String> list = this.mapKeywordDocumentids.get(str);
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            Document document = this.map_docid_document.get(it.next());
            if (document != null) {
                arrayList.add(document);
            }
        }
        return arrayList;
    }

    public Document getDocumentById(String str) {
        return this.map_docid_document.get(str);
    }

    public int getDocumentSize() {
        if (this.map_docid_document == null) {
            return 0;
        }
        return this.map_docid_document.size();
    }

    public List<Keyword> getItemCount(String str) {
        if (!this.mapItemCount.containsKey(str)) {
            return new ArrayList();
        }
        ArrayList arrayList = new ArrayList();
        Counter<String> counter = this.mapItemCount.get(str);
        for (String str2 : counter.getObjectList()) {
            long count = counter.getCount(str2);
            DefaultKeyword defaultKeyword = new DefaultKeyword();
            defaultKeyword.setLex(str2);
            defaultKeyword.setCount(count);
            arrayList.add(defaultKeyword);
        }
        return arrayList;
    }

    @Override // nlp4j.indexer.AbstractDocumentIndexer, nlp4j.indexer.DocumentIndexer
    public List<Keyword> getKeywords() {
        ArrayList arrayList = new ArrayList(this.keywordCount.keySet());
        for (int i = 0; i < arrayList.size(); i++) {
            ((Keyword) arrayList.get(i)).setCount(this.keywordCount.get(arrayList.get(i)).longValue());
        }
        long currentTimeMillis = System.currentTimeMillis();
        List<Keyword> list = (List) arrayList.stream().sorted(Comparator.comparing((v0) -> {
            return v0.getCount();
        }).reversed()).collect(Collectors.toList());
        logger.info("sort_time: " + (System.currentTimeMillis() - currentTimeMillis));
        return list;
    }

    @Override // nlp4j.indexer.AbstractDocumentIndexer, nlp4j.indexer.DocumentIndexer
    public List<Keyword> getKeywords(String str) {
        HashMap<String, Long> hashMap = this.mapFacetKeywordCount.get(str);
        if (hashMap == null) {
            return new ArrayList();
        }
        ArrayList arrayList = new ArrayList();
        for (String str2 : hashMap.keySet()) {
            long longValue = hashMap.get(str2).longValue();
            DefaultKeyword defaultKeyword = new DefaultKeyword();
            defaultKeyword.setLex(str2);
            defaultKeyword.setStr(str2);
            defaultKeyword.setCount(longValue);
            defaultKeyword.setFacet(str);
            arrayList.add(defaultKeyword);
        }
        return arrayList;
    }

    @Override // nlp4j.indexer.AbstractDocumentIndexer, nlp4j.indexer.DocumentIndexer
    public List<Keyword> getKeywords(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = this.map_docid_document.keySet().iterator();
        while (it.hasNext()) {
            arrayList.add(this.map_docid_document.get(it.next()));
        }
        if (str2 == null) {
            return getKeywords(str);
        }
        String[] split = str2.split("=");
        String str3 = split[0];
        String str4 = split[1];
        List<Document> list = (List) arrayList.stream().filter(document -> {
            return document.getAttribute(str3).equals(str4);
        }).collect(Collectors.toList());
        SimpleDocumentIndex simpleDocumentIndex = new SimpleDocumentIndex();
        simpleDocumentIndex.addDocuments(list);
        List<Keyword> keywords = simpleDocumentIndex.getKeywords(str);
        int size = this.map_docid_document.size();
        int size2 = list.size();
        for (int i = 0; i < keywords.size(); i++) {
            keywords.get(i).setCorrelation((r0.getCount() / this.mapFacetKeywordCount.get(r0.getFacet()).get(r0.getLex()).longValue()) / (size2 / size));
        }
        return (List) keywords.stream().sorted(Comparator.comparing((v0) -> {
            return v0.getCorrelation();
        }).reversed()).collect(Collectors.toList());
    }

    public List<Keyword> getKeywordsWithoutCount() {
        return this.keywords;
    }

    @Override // nlp4j.indexer.AbstractDocumentIndexer, nlp4j.indexer.DocumentIndexer
    public void setProperty(String str, String str2) {
        super.setProperty(str, str2);
        if (!KEY_DATEFIELD.equals(str) || str2.split(",").length <= 1) {
            return;
        }
        this.dateField = str2.split(",")[0];
        this.dateFieldFormat = str2.split(",")[1];
    }

    public String toString() {
        return "SimpleDocumentIndex [mapDocument.size=" + this.map_docid_document.keySet().size() + ", mapKeywordCount.size=" + this.mapFacetKeywordCount.keySet().size() + "]";
    }

    public long getDocumentCount(Keyword keyword) {
        return getDocumentidsByKeyword(keyword).size();
    }

    public List<Keyword> getRelevantKeywords(Keyword keyword, double d) {
        String facet = keyword.getFacet();
        List<String> documentidsByKeyword = getDocumentidsByKeyword(keyword);
        Counter counter = new Counter();
        Iterator<String> it = documentidsByKeyword.iterator();
        while (it.hasNext()) {
            Document documentById = getDocumentById(it.next());
            Iterator<Keyword> it2 = (facet != null ? documentById.getKeywords(facet) : documentById.getKeywords()).iterator();
            while (it2.hasNext()) {
                counter.add(it2.next().getLex());
            }
        }
        List<String> objectList = counter.getObjectList();
        long documentSize = getDocumentSize();
        ArrayList arrayList = new ArrayList();
        for (String str : objectList) {
            DefaultKeyword defaultKeyword = new DefaultKeyword(facet, str);
            long count = counter.getCount(str);
            defaultKeyword.setCount(count);
            long documentCount = getDocumentCount(defaultKeyword);
            if (documentCount == 0) {
                documentCount = 1;
            }
            defaultKeyword.setCorrelation((documentidsByKeyword.size() / count) / (documentCount / documentSize));
            arrayList.add(defaultKeyword);
        }
        List<Keyword> list = (List) arrayList.stream().filter(keyword2 -> {
            return keyword2.getCorrelation() > d;
        }).collect(Collectors.toList());
        Collections.sort(list, new Comparator<Keyword>() { // from class: nlp4j.indexer.SimpleDocumentIndex.1
            @Override // java.util.Comparator
            public int compare(Keyword keyword3, Keyword keyword4) {
                double correlation = keyword4.getCorrelation();
                double correlation2 = keyword3.getCorrelation();
                if (correlation == correlation2) {
                    return 0;
                }
                return correlation > correlation2 ? 1 : -1;
            }
        });
        return list;
    }

    public double getkeywordIDF(Keyword keyword) {
        return Math.log10((getDocumentCount() != -1 ? getDocumentCount() : 0L) / ((getKeywordCount(keyword) != -1 ? getKeywordCount(keyword) : 0L) + 1.0d));
    }

    public double getkeywordTFIDF(Keyword keyword, long j) {
        return (j / getDocumentCount()) * getkeywordIDF(keyword);
    }
}
