package org.wikibrain.cookbook.spatial;

import com.vividsolutions.jts.geom.Geometry;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.dao.RawPageDao;
import org.wikibrain.core.dao.UniversalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.LocalPage;
import org.wikibrain.spatial.core.dao.SpatialDataDao;
import org.wikibrain.utils.WpIOUtils;

/* loaded from: input_file:org/wikibrain/cookbook/spatial/CitationAnalyzer.class */
public class CitationAnalyzer {
    private final Language language;
    private final LocalPageDao pageDao;
    private final SpatialDataDao spatialDao;
    private final UniversalPageDao conceptDao;
    private final RawPageDao rawPageDao;
    private Map<LocalPage, Geometry> countries = new HashMap();

    /* JADX WARN: Multi-variable type inference failed */
    public CitationAnalyzer(Env env, Language language) throws ConfigurationException, DaoException {
        this.language = language;
        this.pageDao = (LocalPageDao) env.getConfigurator().get(LocalPageDao.class);
        this.spatialDao = (SpatialDataDao) env.getConfigurator().get(SpatialDataDao.class);
        this.conceptDao = (UniversalPageDao) env.getConfigurator().get(UniversalPageDao.class);
        this.rawPageDao = (RawPageDao) env.getConfigurator().get(RawPageDao.class);
        Map allGeometriesInLayer = this.spatialDao.getAllGeometriesInLayer("country");
        Iterator it = allGeometriesInLayer.keySet().iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            LocalPage localPage = getLocalPage(intValue);
            if (localPage != null) {
                this.countries.put(localPage, allGeometriesInLayer.get(Integer.valueOf(intValue)));
            }
        }
    }

    public void createCsv(File file) throws DaoException, IOException {
        Map allGeometriesInLayer = this.spatialDao.getAllGeometriesInLayer("wikidata");
        BufferedWriter openWriter = WpIOUtils.openWriter(file);
        openWriter.write(StringUtils.join(Arrays.asList("language", "articleId", "articleTitle", "articleLat", "articleLong", "countryId", "countryTitle", "countryLat", "countryLong", "url"), "\t") + "\n");
        Iterator it = allGeometriesInLayer.keySet().iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            LocalPage localPage = getLocalPage(intValue);
            Geometry geometry = (Geometry) allGeometriesInLayer.get(Integer.valueOf(intValue));
            LocalPage containingCountry = getContainingCountry(geometry);
            Geometry geometry2 = this.countries.get(containingCountry);
            if (containingCountry != null && localPage != null) {
                List asList = Arrays.asList(this.language.getLangCode(), "" + localPage.getLocalId(), "" + localPage.getTitle(), "" + geometry.getCentroid().getX(), "" + geometry.getCentroid().getY(), "" + containingCountry.getLocalId(), "" + containingCountry.getTitle(), "" + geometry2.getCentroid().getX(), "" + geometry2.getCentroid().getY(), "NULL");
                openWriter.write(StringUtils.join(asList, "\t") + "\n");
                Iterator<String> it2 = extractUrls(this.rawPageDao.getById(this.language, localPage.getLocalId()).getBody()).iterator();
                while (it2.hasNext()) {
                    asList.set(asList.size() - 1, it2.next().replaceAll("\\s+", " ").trim());
                    openWriter.write(StringUtils.join(asList, "\t") + "\n");
                }
            }
        }
        openWriter.close();
    }

    private LocalPage getContainingCountry(Geometry geometry) {
        for (LocalPage localPage : this.countries.keySet()) {
            if (this.countries.get(localPage).contains(geometry)) {
                return localPage;
            }
        }
        return null;
    }

    public static List<String> extractUrls(String str) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = Pattern.compile("\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum|travel|[a-z]{2}))(:[\\d]{1,5})?(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b").matcher(str);
        while (matcher.find()) {
            arrayList.add(matcher.group());
        }
        return arrayList;
    }

    private LocalPage getLocalPage(int i) throws DaoException {
        int localId = this.conceptDao.getLocalId(this.language, i, 1);
        if (localId < 0) {
            return null;
        }
        return this.pageDao.getById(this.language, localId);
    }

    public static void main(String[] strArr) throws Exception {
        new CitationAnalyzer(EnvBuilder.envFromArgs(strArr), Language.SIMPLE).createCsv(new File("citations.tsv"));
    }
}
