package nlp4j.wiki.util;

import info.bliki.wiki.model.WikiModel;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nlp4j.Keyword;
import nlp4j.impl.DefaultKeyword;
import nlp4j.wiki.template.WikiTemplateNormalizer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.sweble.wikitext.engine.ExpansionCallback;
import org.sweble.wikitext.engine.PageId;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.WtEngineImpl;
import org.sweble.wikitext.engine.config.WikiConfigImpl;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.example.TextConverter;

/* loaded from: input_file:nlp4j/wiki/util/WikiUtils.class */
public class WikiUtils {
    private static final Map<String, String> mapForNormalize = new HashMap();
    static String splitter;
    private static String REGEX_JA_WAGOKANJI;
    private static Pattern p_wagokanji;
    private static String REGEX_WAGOKANJI_OF;
    private static Pattern p_wagokanji_of;
    private static String REGEX_ALTERNATIVE_FORM_OF;
    private static Pattern p_alternative_form_of;

    public static List<Keyword> extractKeywordsFromWikiText2(String str, String str2) {
        try {
            return extractKeywordsFromWikiHtml(toHtml(str), "wikilink");
        } catch (Exception e) {
            return null;
        }
    }

    public static List<Keyword> extractKeywordsFromWikiHtml(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        Elements select = Jsoup.parse(str).select("a");
        for (int i = 0; i < select.size(); i++) {
            String attr = ((Element) select.get(i)).attr("title");
            if (!attr.startsWith("Template") && !attr.isEmpty()) {
                DefaultKeyword defaultKeyword = new DefaultKeyword();
                defaultKeyword.setLex(attr);
                defaultKeyword.setStr(attr);
                defaultKeyword.setFacet(str2);
                arrayList.add(defaultKeyword);
            }
        }
        return arrayList;
    }

    public static List<Keyword> extractKeywordsFromWikiText(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = p_wagokanji.matcher(str);
        if (matcher.find()) {
            arrayList.add(new DefaultKeyword(str2, matcher.group(1)));
        }
        Matcher matcher2 = p_wagokanji_of.matcher(str);
        if (matcher2.find()) {
            arrayList.add(new DefaultKeyword(str2, matcher2.group(1)));
        }
        Matcher matcher3 = p_alternative_form_of.matcher(str);
        if (matcher3.find()) {
            arrayList.add(new DefaultKeyword(str2, matcher3.group(1)));
        }
        return arrayList;
    }

    public static String[] extractSpells(String str) {
        int indexOf;
        String replace = str.replace(" ", "");
        int indexOf2 = replace.indexOf("}}");
        if (indexOf2 == -1 || (indexOf = replace.indexOf("==", indexOf2)) == -1 || indexOf2 + 2 == indexOf) {
            return null;
        }
        String substring = replace.substring(indexOf2 + 2, indexOf);
        ArrayList arrayList = new ArrayList();
        for (String str2 : substring.split(splitter)) {
            if (!str2.trim().isEmpty()) {
                arrayList.add(str2);
            }
        }
        return (String[]) arrayList.toArray(new String[0]);
    }

    public static void getTags(String str) {
        if (str.contains("==[[漢字]]==")) {
            System.err.println("漢字");
        }
    }

    public static String normailzeHeader(String str) {
        int indexOf;
        String replace = str.replace(" ", "");
        int indexOf2 = replace.indexOf("}}");
        if (indexOf2 != -1 && (indexOf = replace.indexOf("==", indexOf2)) != -1) {
            replace = replace.substring(0, indexOf2 + 2) + replace.substring(indexOf);
        }
        return mapForNormalize.containsKey(replace) ? mapForNormalize.get(replace) : replace;
    }

    public static String normailzeHeaderPath(String str) {
        if (str == null) {
            return null;
        }
        Iterator it = new ArrayList(mapForNormalize.keySet()).iterator();
        while (it.hasNext()) {
            String str2 = (String) it.next();
            str = str.replace(str2, mapForNormalize.get(str2));
        }
        return str;
    }

    public static String toHtml(String str) throws Exception {
        WikiModel wikiModel = new WikiModel("", "");
        wikiModel.setUp();
        try {
            return wikiModel.render(str);
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static String toPlainText(String str) {
        String replaceAll = WikiTemplateNormalizer.normalize(str).replaceAll("\\{\\{.*?\\}\\}", "");
        WikiConfigImpl generate = DefaultConfigEnWp.generate();
        try {
            return (String) new TextConverter(generate, 1000).go(new WtEngineImpl(generate).postprocess(new PageId(PageTitle.make(generate, "Wiki"), -1L), replaceAll, (ExpansionCallback) null).getPage());
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public static String toPlainText2(String str) {
        String plainText = toPlainText(str);
        if (plainText == null) {
            return null;
        }
        return plainText.replace("**", "").replace("-", "").replace("[[Category:_]]", "").replaceAll("\\[\\[.*?\\]\\]", "").trim();
    }

    static {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(WikiUtils.class.getClassLoader().getResourceAsStream("nlp4j/wiki/util/WikiUtilsConfig.txt"), StandardCharsets.UTF_8));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                if (readLine.contains("\t")) {
                    String[] split = readLine.split("\t");
                    mapForNormalize.put("=" + split[0] + "=", "=" + split[1] + "=");
                }
            }
            bufferedReader.close();
        } catch (Exception e) {
        }
        splitter = "(:|・)";
        REGEX_JA_WAGOKANJI = "\\{\\{ja-wagokanji\\|(.*?)\\}\\}";
        p_wagokanji = Pattern.compile(REGEX_JA_WAGOKANJI);
        REGEX_WAGOKANJI_OF = "\\{\\{wagokanji of\\|(.*?)\\}\\}";
        p_wagokanji_of = Pattern.compile(REGEX_WAGOKANJI_OF);
        REGEX_ALTERNATIVE_FORM_OF = "\\{\\{alternative form of\\|ja\\|(.*?)\\}\\}";
        p_alternative_form_of = Pattern.compile(REGEX_ALTERNATIVE_FORM_OF);
    }
}
