package org.fnlp.nlp.corpus;

import gnu.trove.map.hash.TCharIntHashMap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import org.fnlp.util.MyFiles;

/* loaded from: input_file:org/fnlp/nlp/corpus/CorpusCount.class */
public class CorpusCount {
    TCharIntHashMap charfreq = new TCharIntHashMap();
    int charnum = 0;

    public static void main(String[] strArr) throws Exception {
        CorpusCount corpusCount = new CorpusCount();
        corpusCount.countChar("D:/wordcluster/SogouCA", "GBK");
        corpusCount.toString();
    }

    private void countChar(String str, String str2) throws IOException {
        if (new File(str).isDirectory()) {
            Iterator<File> it = MyFiles.getAllFiles(str, null).iterator();
            while (it.hasNext()) {
                countChar(it.next().toString(), str2);
            }
            return;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), str2));
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            if (readLine.length() != 0 && readLine.startsWith("<content")) {
                if (i % 10000 == 0) {
                    System.out.println(i);
                }
                i++;
                readLine.replace("<contenttitle>", "");
                readLine.replace("</contenttitle>", "");
                readLine.replace("<content>", "");
                readLine.replace("</content>", "");
                for (int i2 = 0; i2 < readLine.length(); i2++) {
                    this.charfreq.adjustOrPutValue(readLine.charAt(i2), 1, 1);
                    this.charnum++;
                }
            }
        }
    }

    public String toString() {
        return ("char type number:\t" + this.charfreq.size() + "\n") + "char number:\t" + this.charnum + "\n";
    }
}
