package org.fnlp.nlp.corpus.ctbconvert;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.fnlp.nlp.cn.ChineseTrans;
import org.fnlp.nlp.corpus.fnlp.FNLPCorpus;
import org.fnlp.nlp.corpus.fnlp.FNLPDoc;
import org.fnlp.nlp.corpus.fnlp.FNLPSent;
import org.fnlp.util.MyCollection;
import org.fnlp.util.MyFiles;

/* loaded from: input_file:org/fnlp/nlp/corpus/ctbconvert/CoNLL2FNLP.class */
public class CoNLL2FNLP {
    private static boolean HASID = false;
    private static HashMap<String, String> posdict;
    private static HashMap<String, String> reldict;
    private static HashMap<String, String> NRdict;
    List<File> files;
    Charset charset;
    FNLPCorpus corpus;
    ChineseTrans ct;
    private TagCorrect tc;

    public CoNLL2FNLP(String str) throws IOException {
        this(str, "UTF8", null);
    }

    public CoNLL2FNLP(String str, String str2, String str3) throws IOException {
        this.ct = new ChineseTrans();
        this.files = MyFiles.getAllFiles(str, str3);
        this.charset = Charset.forName(str2);
        this.tc = new TagCorrect();
    }

    public void read() throws IOException {
        this.corpus = new FNLPCorpus();
        ArrayList arrayList = new ArrayList();
        for (File file : this.files) {
            BufferedReader bufferedReader = null;
            try {
                bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), this.charset));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            FNLPDoc fNLPDoc = new FNLPDoc();
            fNLPDoc.name = file.getName();
            arrayList.clear();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if (!trim.matches("^$")) {
                    arrayList.add(trim);
                } else if (arrayList.size() > 0) {
                    FNLPSent fNLPSent = new FNLPSent();
                    fNLPSent.parse(arrayList, 1, HASID);
                    for (int i = 0; i < fNLPSent.words.length; i++) {
                        fNLPSent.words[i] = this.ct.normalize(fNLPSent.words[i]);
                    }
                    correct(fNLPSent);
                    fNLPDoc.add(fNLPSent);
                    arrayList.clear();
                }
            }
            if (!arrayList.isEmpty()) {
                FNLPSent fNLPSent2 = new FNLPSent();
                fNLPSent2.parse(arrayList, 1, HASID);
                correct(fNLPSent2);
                fNLPDoc.add(fNLPSent2);
                arrayList.clear();
            }
            this.corpus.add(fNLPDoc);
        }
    }

    public void correct(FNLPSent fNLPSent) {
        for (int i = 0; i < fNLPSent.tags.length; i++) {
            if (fNLPSent.words[i].equals("觉得")) {
                System.out.print("");
            }
            String str = posdict.get(fNLPSent.tags[i]);
            if (str != null) {
                fNLPSent.tags[i] = str;
            }
            String str2 = NRdict.get(fNLPSent.words[i]);
            if (str2 != null && fNLPSent.tags[i].equals("专有名")) {
                fNLPSent.tags[i] = str2;
            }
            String str3 = reldict.get(fNLPSent.relations[i]);
            if (str3 != null) {
                fNLPSent.relations[i] = str3;
            }
            this.tc.checkPronoun(fNLPSent.words, fNLPSent.tags, i);
        }
    }

    public static void main(String[] strArr) throws IOException {
        posdict = MyCollection.loadStringStringMap("./data/map/pos-ctb2fnlp.txt");
        reldict = MyCollection.loadStringStringMap("./data/map/rel-ctb2fnlp.txt");
        NRdict = MyCollection.loadStringStringMap("./data/map/pos-nr.txt");
        CoNLL2FNLP coNLL2FNLP = new CoNLL2FNLP("./data/ctb/result.txt", "utf-8", ".txt");
        HASID = true;
        coNLL2FNLP.read();
        coNLL2FNLP.corpus.writeOne("./data/FNLPDATA/ctb7.dat");
        coNLL2FNLP.corpus.count("./data/FNLPDATA/count", false);
        MyCollection.write(coNLL2FNLP.tc.pronount, "./data/FNLPDATA/pronount.txt");
        System.out.println("Done!");
    }
}
