package edu.emory.mathcs.nlp.zzz;

import edu.emory.mathcs.nlp.common.constant.StringConst;
import edu.emory.mathcs.nlp.common.util.CharUtils;
import edu.emory.mathcs.nlp.common.util.IOUtils;
import edu.emory.mathcs.nlp.common.util.Joiner;
import edu.emory.mathcs.nlp.common.util.StringUtils;
import edu.emory.mathcs.nlp.tokenization.EnglishTokenizer;
import edu.emory.mathcs.nlp.tokenization.Tokenizer;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/* loaded from: input_file:edu/emory/mathcs/nlp/zzz/NEGazetteerCreate.class */
public class NEGazetteerCreate {
    Tokenizer tokenizer = new EnglishTokenizer();

    public Set<String> union(InputStream inputStream, InputStream inputStream2, boolean z, boolean z2) throws Exception {
        HashSet hashSet = new HashSet();
        read(inputStream, hashSet, false, z, z2);
        read(inputStream2, hashSet, true, z, z2);
        return hashSet;
    }

    public void read(InputStream inputStream, Set<String> set, boolean z, boolean z2, boolean z3) throws Exception {
        BufferedReader createBufferedReader = IOUtils.createBufferedReader(inputStream);
        while (true) {
            String readLine = createBufferedReader.readLine();
            if (readLine == null) {
                System.out.println(set.size());
                createBufferedReader.close();
                return;
            }
            String trim = readLine.trim();
            if (!z2 || (!trim.contains(":") && !trim.contains(" of "))) {
                if (z) {
                    trim = splitRedirect(trim);
                }
                List<String> list = (List) this.tokenizer.tokenize(trim).stream().map(nLPNode -> {
                    return nLPNode.getWordForm();
                }).collect(Collectors.toList());
                concatPeriod(list);
                trimTokens(list, z3);
                if (!list.isEmpty()) {
                    set.add(Joiner.join(list, " "));
                }
            }
        }
    }

    private void concatPeriod(List<String> list) {
        if (list.size() == 2 && list.get(1).equals(".") && list.get(0).contains(".")) {
            list.set(0, list.get(0) + ".");
            list.remove(1);
        }
    }

    public String splitRedirect(String str) {
        if (str.contains(StringConst.SPACE)) {
            return str;
        }
        StringBuilder sb = new StringBuilder();
        char[] charArray = str.toCharArray();
        int length = charArray.length;
        for (int i = 0; i < length; i++) {
            if (0 < i && i < length - 1 && CharUtils.isLowerCase(charArray[i - 1]) && CharUtils.isUpperCase(charArray[i])) {
                sb.append(StringConst.SPACE);
            }
            sb.append(charArray[i]);
        }
        return sb.toString();
    }

    public void trimTokens(List<String> list, boolean z) {
        Iterator<String> it = list.iterator();
        int i = -1;
        int i2 = 0;
        while (true) {
            if (i2 >= list.size()) {
                break;
            }
            if (list.get(i2).equals(StringConst.LRB)) {
                i = i2;
            } else if (list.get(i2).equals(StringConst.RRB) && i >= 0) {
                list.subList(i, i2 + 1).clear();
                break;
            }
            i2++;
        }
        while (it.hasNext() && StringUtils.containsPunctuationOnly(it.next())) {
            it.remove();
        }
        for (int size = list.size() - 1; size >= 0 && StringUtils.containsPunctuationOnly(list.get(size)); size--) {
            list.remove(size);
        }
        if (list.size() == 1) {
            if ((!z || list.get(0).contains(".")) && !StringUtils.containsDigitOnly(list.get(0))) {
                return;
            }
            list.clear();
        }
    }

    public void print(OutputStream outputStream, Set<String> set) {
        PrintStream createBufferedPrintStream = IOUtils.createBufferedPrintStream(outputStream);
        ArrayList arrayList = new ArrayList(set);
        Collections.sort(arrayList);
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            createBufferedPrintStream.println((String) it.next());
        }
        createBufferedPrintStream.close();
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr[0];
        NEGazetteerCreate nEGazetteerCreate = new NEGazetteerCreate();
        String str2 = str + "/WikiArtWork";
        System.out.println(str2);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str2 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str2 + ".txt"), IOUtils.createFileInputStream(str2 + "Redirects.txt"), false, true));
        String str3 = str + "/WikiFilms";
        System.out.println(str3);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str3 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str3 + ".txt"), IOUtils.createFileInputStream(str3 + "Redirects.txt"), false, true));
        String str4 = str + "/WikiSongs";
        System.out.println(str4);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str4 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str4 + ".txt"), IOUtils.createFileInputStream(str4 + "Redirects.txt"), false, true));
        String str5 = str + "/WikiManMadeObjectNames";
        System.out.println(str5);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str5 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str5 + ".txt"), IOUtils.createFileInputStream(str5 + "Redirects.txt"), false, false));
        String str6 = str + "/WikiCompetitionsBattlesEvents";
        System.out.println(str6);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str6 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str6 + ".txt"), IOUtils.createFileInputStream(str6 + "Redirects.txt"), false, false));
        String str7 = str + "/WikiLocations";
        System.out.println(str7);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str7 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str7 + ".txt"), IOUtils.createFileInputStream(str7 + "Redirects.txt"), true, false));
        String str8 = str + "/WikiOrganizations";
        System.out.println(str8);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str8 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str8 + ".txt"), IOUtils.createFileInputStream(str8 + "Redirects.txt"), true, false));
        String str9 = str + "/WikiPeople";
        System.out.println(str9);
        nEGazetteerCreate.print(IOUtils.createFileOutputStream(str9 + ".union"), nEGazetteerCreate.union(IOUtils.createFileInputStream(str9 + ".txt"), IOUtils.createFileInputStream(str9 + "Redirects.txt"), true, false));
    }
}
